> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# List all jobs

> Get the list of all existing job configurations.



## OpenAPI

````yaml /api-reference/5.9/fusion-api-job-rest-server.json get /spark/configurations
openapi: 3.0.1
info:
  title: Job REST Server API
  description: This group of APIs allows you to manage jobs, recommendations, and signals.
  contact:
    name: Lucidworks
    url: www.lucidworks.com
    email: support@lucidworks.com
  license:
    name: License of the API
    url: https://lucidworks.com/legal/developer-license-agreement/
  version: '5.9'
servers:
  - url: https://{FUSION HOST}/api
    description: Fusion
    variables:
      FUSION HOST:
        default: FUSION_HOST
        description: Your environment host.
  - url: https://{FUSION HOST}/api/apps/{APP_NAME}
    description: Fusion app
    variables:
      FUSION HOST:
        default: FUSION_HOST
        description: Your environment host.
      APP_NAME:
        default: APP_NAME
        description: The name of your Fusion application.
security:
  - Basic auth: []
  - API key: []
tags:
  - name: Recommendation Controller API
    description: Endpoints for the recommendation controller.
  - name: Signals Controller API
    description: Endpoints for the signals controller.
  - name: Spark Job Config Controller API
    description: Use these endpoints to manage Spark job configurations.
  - name: Spark Job Controller API
    description: Use these endpoints to start, stop, and check the status of Spark jobs.
externalDocs:
  description: Lucidworks Documentation
  url: https://doc.lucidworks.com/
paths:
  /spark/configurations:
    get:
      tags:
        - Spark Job Config Controller API
      summary: List all jobs
      description: Get the list of all existing job configurations.
      operationId: findJobConfigs
      responses:
        '200':
          description: OK
          content:
            '*/*':
              schema:
                type: array
                items:
                  type: object
              example:
                - id: product_demo_spell_correction
                  trainingDataFilterQuery: '*:*'
                  trainingCollection: product_demo_signals
                  outputCollection: product_demo_query_rewrite_staging
                  overwriteOutput: true
                  maxDistance: 2
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  analyzerConfigDictionary: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  lenScale: 5
                  fieldToVectorize: query
                  mainType: click
                  filterType: response
                  signalTypeField: type
                  minCountMain: 1
                  minCountFilter: 10
                  trainingDataSamplingFraction: 1
                  countField: count_i
                  dictionaryDataFilterQuery: '*:*'
                  minPrefix: 1
                  minMispellingLen: 5
                  correctionThreshold: 0.8
                  misspellingThreshold: 0.8
                  lastCharMatchBoost: 1
                  soundMatchBoost: 3
                  correctCntBoost: 2
                  editDistBoost: 2
                  corMisRatio: 3
                  randomSeed: 1234
                  signalDataIndicator: true
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: tokenPhraseSpellCorrection
                  updates:
                    - userId: eric.redman@lucidworks.com
                      timestamp: '2025-04-15T15:32:22.962021895Z'
                - id: query-query-recommendations
                  inputCollection: Docs_Site_2_signals
                  sourceCatchup: true
                  sourceRemove: false
                  sql: "SELECT DISTINCT d.doc_id as doc_s,\n\tcollect_set(d.query_orig_s) as query_ss,\n\tfirst(d.id) as id,\n\tapprox_count_distinct(d.query_orig_s) as count_i\n\tFROM ${inputCollection} as d\n\tGROUP BY doc_s"
                  skipCheckEnabled: true
                  skipJobIfSignalsEmpty: true
                  selectQuery: '*:*'
                  rows: 10000
                  outputCollection: Docs_Site_2_queries_query_recs
                  useNaturalKey: true
                  optimizeSegments: 0
                  dataFormat: solr
                  sparkSQL: SELECT * from spark_input
                  sparkPartitions: 200
                  type: aggregation
                  updates:
                    - userId: docs
                      timestamp: '2023-11-16T22:11:06.157793Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:12:50.565353Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:12:50.568009Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:25:02.487439Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:25:02.489696Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:29:23.298780Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:29:23.301201Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:49:57.502412Z'
                    - userId: docs
                      timestamp: '2023-11-16T22:49:57.504789Z'
                    - userId: docs
                      timestamp: '2023-11-16T23:08:31.127377Z'
                    - userId: docs
                      timestamp: '2023-11-16T23:08:31.129831Z'
                    - userId: docs
                      timestamp: '2023-11-17T00:13:06.160988Z'
                    - userId: docs
                      timestamp: '2023-11-17T00:13:06.163055Z'
                    - userId: docs
                      timestamp: '2023-11-17T00:51:44.405324Z'
                    - userId: docs
                      timestamp: '2023-11-17T00:51:44.407660Z'
                    - userId: docs
                      timestamp: '2023-11-17T01:04:32.624678Z'
                    - userId: docs
                      timestamp: '2023-11-17T01:04:32.626958Z'
                    - userId: docs
                      timestamp: '2023-11-20T16:49:16.543368Z'
                    - userId: docs
                      timestamp: '2023-11-20T16:49:16.551262Z'
                - id: api-test-app_click_signals_aggregation
                  inputCollection: api-test-app_signals
                  outputCollection: api-test-app_signals_aggr
                  sql: |-
                    WITH sigs_with_filters AS (
                       SELECT c.query as query,
                              c.doc_id,
                              q.filters_s as filters,
                              c.type,
                              c.ref_time,
                              coalesce(c.count_i,1) as count_i,
                              c.timestamp_tdt,
                              greatest(coalesce(c.weight_d,0.1),0.0) as weight_d
                         FROM ${inputCollection} c
                     LEFT JOIN (SELECT id, filters_s FROM ${inputCollection} WHERE type='response') q ON q.id = c.fusion_query_id
                        WHERE c.type IN (${signalTypes}) AND c.timestamp_tdt >= c.catchup_timestamp_tdt
                     ), signal_type_groups AS (
                         SELECT SUM(count_i) AS typed_aggr_count_i,
                                query,
                                doc_id,
                                type,
                                filters,
                                time_decay(count_i, timestamp_tdt, "30 days", ref_time, weight_d) AS typed_weight_d
                           FROM sigs_with_filters
                       GROUP BY doc_id, query, filters, type
                     ) SELECT concat_ws('|', query, doc_id, filters) as id,
                              SUM(typed_aggr_count_i) AS aggr_count_i,
                              query AS query_s,
                              query AS query_t,
                              doc_id AS doc_id_s,
                              filters AS filters_s,
                              SPLIT(filters, ' \\$ ') AS filters_ss,
                              weighted_sum(typed_weight_d, type, '${signalTypeWeights}') AS weight_d
                         FROM signal_type_groups
                     GROUP BY query, doc_id, filters
                  rollupSql: |-
                    SELECT concat_ws('|', query_s, doc_id_s, filters_s) as id,
                      query_s,
                      query_s as query_t,
                      doc_id_s,
                      filters_s,
                      first(aggr_type_s) AS aggr_type_s,
                      SPLIT(filters_s, ' \\$ ') AS filters_ss,
                      SUM(weight_d) AS weight_d,
                      SUM(aggr_count_i) AS aggr_count_i
                      FROM api-test-app_signals_aggr
                      GROUP BY query_s, doc_id_s, filters_s
                  parameters:
                    - key: signalTypeWeights
                      value: click:1.0,cart:10.0,purchase:25.0
                  selectQuery: '*:*'
                  sourceRemove: false
                  sourceCatchup: true
                  hiddenParameters:
                    - key: signalTypes
                      value: >-
                        _regex/signalTypeWeights/([\w\-\.]*):([\d\.\-]*)(,|$)/'$1'$3/g
                  skipCheckEnabled: true
                  notes: >-
                    Computes an aggregated weight for each query / item
                    combination found in the signals collection. The weight for
                    each group is computed using an exponential time-decay on
                    signal count (30 day half-life) and a weighted sum based on
                    the signal type. Use the 'signalTypeWeights' parameter to
                    set the correct signal types and weights for your dataset.
                    The results of this job can be used to boost documents at
                    query time using the boosting stage.
                  useNaturalKey: true
                  optimizeSegments: 0
                  dataFormat: solr
                  sparkSQL: SELECT * from spark_input
                  sparkPartitions: 200
                  type: sql_template
                  updates:
                    - userId: laureltraining
                      timestamp: '2025-10-02T19:41:38.015534417Z'
                - id: api-test-app_session_rollup
                  inputCollection: api-test-app_signals
                  outputCollection: api-test-app_signals
                  sql: |-
                    WITH session_agg AS (
                         SELECT COUNT(1) AS activity_count,
                                MIN(timestamp_tdt) AS start,
                                MAX(timestamp_tdt) AS end,
                                timediff(MAX(timestamp_tdt), MIN(timestamp_tdt), "MINUTES") AS duration,
                                'session' AS type,
                                first(user_id) AS user,
                                session_keywords(query) AS keywords,
                                session
                           FROM ${inputCollection}
                          WHERE timestamp_tdt IS NOT NULL
                            AND type != 'session'
                            AND session IS NOT NULL
                            AND session NOT IN (SELECT session FROM ${inputCollection} WHERE type = 'session' AND session IS NOT NULL)
                       GROUP BY session
                         HAVING timediff(current_timestamp(), MAX(timestamp_tdt), "SECONDS") >= ${elapsedSecsSinceLastActivity} OR timediff(current_timestamp(), MIN(timestamp_tdt), "SECONDS") >= ${elapsedSecsSinceSessionStart})
                     SELECT activity_count, start, end, duration, type, user, keywords, session FROM session_agg
                  parameters:
                    - key: elapsedSecsSinceLastActivity
                      value: '360'
                    - key: elapsedSecsSinceSessionStart
                      value: '3600'
                  selectQuery: '*:*'
                  sourceRemove: false
                  sourceCatchup: false
                  hiddenParameters:
                    - key: outputProjectedFieldsOnly
                      value: 'true'
                  skipCheckEnabled: false
                  notes: >-
                    Aggregate related user activity into a session signal
                    containing activity count, duration, and keywords (based on
                    user search terms). This job is used by the Fusion Insights
                    application to show reports about user sessions. Use the
                    'elapsedSecsSinceLastActivity' and
                    'elapsedSecsSinceSessionStart' parameters to determine when
                    a user session is considered to be complete.
                  useNaturalKey: true
                  optimizeSegments: 0
                  dataFormat: solr
                  sparkSQL: SELECT * FROM spark_input
                  sparkPartitions: 200
                  type: sql_template
                  updates:
                    - userId: laureltraining
                      timestamp: '2025-10-02T19:41:38.027725367Z'
                - id: product_demo_session_rollup
                  inputCollection: product_demo_signals
                  outputCollection: product_demo_signals
                  sql: |-
                    WITH session_agg AS (
                         SELECT COUNT(1) AS activity_count,
                                MIN(timestamp_tdt) AS start,
                                MAX(timestamp_tdt) AS end,
                                timediff(MAX(timestamp_tdt), MIN(timestamp_tdt), "MINUTES") AS duration,
                                'session' AS type,
                                first(user_id) AS user,
                                session_keywords(query) AS keywords,
                                session
                           FROM ${inputCollection}
                          WHERE timestamp_tdt IS NOT NULL
                            AND type != 'session'
                            AND session IS NOT NULL
                            AND session NOT IN (SELECT session FROM ${inputCollection} WHERE type = 'session' AND session IS NOT NULL)
                       GROUP BY session
                         HAVING timediff(current_timestamp(), MAX(timestamp_tdt), "SECONDS") >= ${elapsedSecsSinceLastActivity} OR timediff(current_timestamp(), MIN(timestamp_tdt), "SECONDS") >= ${elapsedSecsSinceSessionStart})
                     SELECT activity_count, start, end, duration, type, user, keywords, session FROM session_agg
                  parameters:
                    - key: elapsedSecsSinceLastActivity
                      value: '360'
                    - key: elapsedSecsSinceSessionStart
                      value: '3600'
                  selectQuery: '*:*'
                  sourceRemove: false
                  sourceCatchup: false
                  hiddenParameters:
                    - key: outputProjectedFieldsOnly
                      value: 'true'
                  skipCheckEnabled: false
                  notes: >-
                    Aggregate related user activity into a session signal
                    containing activity count, duration, and keywords (based on
                    user search terms). This job is used by the Fusion Insights
                    application to show reports about user sessions. Use the
                    'elapsedSecsSinceLastActivity' and
                    'elapsedSecsSinceSessionStart' parameters to determine when
                    a user session is considered to be complete.
                  useNaturalKey: true
                  optimizeSegments: 0
                  dataFormat: solr
                  sparkSQL: SELECT * FROM spark_input
                  sparkPartitions: 200
                  type: sql_template
                  updates:
                    - userId: eric.redman@lucidworks.com
                      timestamp: '2025-04-15T15:32:34.657308899Z'
                - id: product_demo_click_signals_aggregation
                  inputCollection: product_demo_signals
                  outputCollection: product_demo_signals_aggr
                  sql: |-
                    WITH sigs_with_filters AS (
                       SELECT c.query as query,
                              c.doc_id,
                              q.filters_s as filters,
                              c.type,
                              c.ref_time,
                              coalesce(c.count_i,1) as count_i,
                              c.timestamp_tdt,
                              greatest(coalesce(c.weight_d,0.1),0.0) as weight_d
                         FROM ${inputCollection} c
                     LEFT JOIN (SELECT id, filters_s FROM ${inputCollection} WHERE type='response') q ON q.id = c.fusion_query_id
                        WHERE c.type IN (${signalTypes}) AND c.timestamp_tdt >= c.catchup_timestamp_tdt
                     ), signal_type_groups AS (
                         SELECT SUM(count_i) AS typed_aggr_count_i,
                                query,
                                doc_id,
                                type,
                                filters,
                                time_decay(count_i, timestamp_tdt, "30 days", ref_time, weight_d) AS typed_weight_d
                           FROM sigs_with_filters
                       GROUP BY doc_id, query, filters, type
                     ) SELECT concat_ws('|', query, doc_id, filters) as id,
                              SUM(typed_aggr_count_i) AS aggr_count_i,
                              query AS query_s,
                              query AS query_t,
                              doc_id AS doc_id_s,
                              filters AS filters_s,
                              SPLIT(filters, ' \\$ ') AS filters_ss,
                              weighted_sum(typed_weight_d, type, '${signalTypeWeights}') AS weight_d
                         FROM signal_type_groups
                     GROUP BY query, doc_id, filters
                  rollupSql: |-
                    SELECT concat_ws('|', query_s, doc_id_s, filters_s) as id,
                      query_s,
                      query_s as query_t,
                      doc_id_s,
                      filters_s,
                      first(aggr_type_s) AS aggr_type_s,
                      SPLIT(filters_s, ' \\$ ') AS filters_ss,
                      SUM(weight_d) AS weight_d,
                      SUM(aggr_count_i) AS aggr_count_i
                      FROM product_demo_signals_aggr
                      GROUP BY query_s, doc_id_s, filters_s
                  parameters:
                    - key: signalTypeWeights
                      value: click:1.0,cart:10.0,purchase:25.0
                  selectQuery: '*:*'
                  sourceRemove: false
                  sourceCatchup: true
                  hiddenParameters:
                    - key: signalTypes
                      value: >-
                        _regex/signalTypeWeights/([\w\-\.]*):([\d\.\-]*)(,|$)/'$1'$3/g
                  skipCheckEnabled: true
                  notes: >-
                    Computes an aggregated weight for each query / item
                    combination found in the signals collection. The weight for
                    each group is computed using an exponential time-decay on
                    signal count (30 day half-life) and a weighted sum based on
                    the signal type. Use the 'signalTypeWeights' parameter to
                    set the correct signal types and weights for your dataset.
                    The results of this job can be used to boost documents at
                    query time using the boosting stage.
                  useNaturalKey: true
                  optimizeSegments: 0
                  dataFormat: solr
                  sparkSQL: SELECT * from spark_input
                  sparkPartitions: 200
                  type: sql_template
                  updates:
                    - userId: eric.redman@lucidworks.com
                      timestamp: '2025-04-15T15:32:34.631440661Z'
                - id: Synthetic_dataset_test_synonym_detection
                  trainingDataFilterQuery: '*:*'
                  trainingCollection: Synthetic_dataset_test_signals_aggr
                  outputCollection: Synthetic_dataset_test_query_rewrite_staging
                  overwriteOutput: true
                  misspellingCollection: Synthetic_dataset_test_query_rewrite_staging
                  misspellingsFilterQuery: type:spell
                  keyPhraseCollection: Synthetic_dataset_test_query_rewrite_staging
                  keyPhraseFilterQuery: type:phrase
                  countField: aggr_count_i
                  fieldToVectorize: query_s
                  docIdField: doc_id_s
                  overlapThreshold: 0.5
                  minQueryCount: 5
                  similarityThreshold: 0.01
                  trainingDataSamplingFraction: 1
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  randomSeed: 1234
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  misspellingSQL: >-

                    SELECT surface_form AS misspelling_s, output AS correction_s

                    FROM spell_input

                    WHERE doc_type  = 'query_rewrite' AND type = 'spell' AND
                    (review = 'approved' OR review = 'auto')

                        
                  misspellingSQLDataFormat: solr
                  phraseSQL: >-

                    SELECT surface_form AS phrases_s, coalesce(confidence, 1) AS
                    likelihood_d, coalesce(word_count,1) AS word_num_i

                    FROM phrase_input

                    WHERE doc_type  = 'query_rewrite' AND type = 'phrase' AND
                    (review = 'approved' OR review = 'auto')

                        
                  phraseSQLDataFormat: solr
                  type: synonymDetection
                  updates:
                    - userId: docs
                      timestamp: '2025-08-08T16:50:31.054004509Z'
                - id: Synthetic_dataset_test_head_tail
                  randomSeed: 1234
                  trainingCollection: Synthetic_dataset_test_signals
                  outputCollection: Synthetic_dataset_test_job_reports
                  overwriteOutput: true
                  trainingDataFilterQuery: '*:*'
                  fieldToVectorize: query
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name": "StdTokLowerStem","charFilters":
                    [ { "type": "htmlstrip" } ],"tokenizer": { "type":
                    "standard" },"filters": [{ "type": "lowercase" },{ "type":
                    "englishminimalstem" }] }],"fields": [{ "regex": ".+",
                    "analyzer": "StdTokLowerStem" } ]}
                  countField: count_i
                  mainType: click
                  filterType: response
                  signalTypeField: type
                  minCountMain: 1
                  minCountFilter: 20
                  queryLenThreshold: 2
                  userHead: -1
                  userTail: -1
                  topQ:
                    - 100
                    - 0.01
                  trafficPerc:
                    - 0.25
                    - 0.5
                    - 0.75
                  lastTraffic:
                    - 0.01
                  trafficCount:
                    - 5
                  overlapThreshold: 4
                  lenScale: 6
                  overlapNumBoost: 10
                  headQueryCntBoost: 1
                  tailRewrite: true
                  sparkPartitions: 200
                  enableAutoPublish: false
                  tailRewriteCollection: Synthetic_dataset_test_query_rewrite_staging
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: headTailAnalysis
                  trainingDataSamplingFraction: 1
                  updates:
                    - userId: docs
                      timestamp: '2025-08-08T16:50:37.976254886Z'
                - id: api-test-app_spell_correction
                  trainingDataFilterQuery: '*:*'
                  trainingCollection: api-test-app_signals
                  outputCollection: api-test-app_query_rewrite_staging
                  overwriteOutput: true
                  maxDistance: 2
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  analyzerConfigDictionary: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  lenScale: 5
                  fieldToVectorize: query
                  mainType: click
                  filterType: response
                  signalTypeField: type
                  minCountMain: 1
                  minCountFilter: 10
                  trainingDataSamplingFraction: 1
                  countField: count_i
                  dictionaryDataFilterQuery: '*:*'
                  minPrefix: 1
                  minMispellingLen: 5
                  correctionThreshold: 0.8
                  misspellingThreshold: 0.8
                  lastCharMatchBoost: 1
                  soundMatchBoost: 3
                  correctCntBoost: 2
                  editDistBoost: 2
                  corMisRatio: 3
                  randomSeed: 1234
                  signalDataIndicator: true
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: tokenPhraseSpellCorrection
                  updates:
                    - userId: laureltraining
                      timestamp: '2025-10-02T19:41:32.857626422Z'
                - id: api-test-app_synonym_detection
                  trainingDataFilterQuery: '*:*'
                  trainingCollection: api-test-app_signals_aggr
                  outputCollection: api-test-app_query_rewrite_staging
                  overwriteOutput: true
                  misspellingCollection: api-test-app_query_rewrite_staging
                  misspellingsFilterQuery: type:spell
                  keyPhraseCollection: api-test-app_query_rewrite_staging
                  keyPhraseFilterQuery: type:phrase
                  countField: aggr_count_i
                  fieldToVectorize: query_s
                  docIdField: doc_id_s
                  overlapThreshold: 0.5
                  minQueryCount: 5
                  similarityThreshold: 0.01
                  trainingDataSamplingFraction: 1
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  randomSeed: 1234
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  misspellingSQL: >-

                    SELECT surface_form AS misspelling_s, output AS correction_s

                    FROM spell_input

                    WHERE doc_type  = 'query_rewrite' AND type = 'spell' AND
                    (review = 'approved' OR review = 'auto')

                        
                  misspellingSQLDataFormat: solr
                  phraseSQL: >-

                    SELECT surface_form AS phrases_s, coalesce(confidence, 1) AS
                    likelihood_d, coalesce(word_count,1) AS word_num_i

                    FROM phrase_input

                    WHERE doc_type  = 'query_rewrite' AND type = 'phrase' AND
                    (review = 'approved' OR review = 'auto')

                        
                  phraseSQLDataFormat: solr
                  type: synonymDetection
                  updates:
                    - userId: laureltraining
                      timestamp: '2025-10-02T19:41:32.888732541Z'
                - id: product_demo_synonym_detection
                  trainingDataFilterQuery: '*:*'
                  trainingCollection: product_demo_signals_aggr
                  outputCollection: product_demo_query_rewrite_staging
                  overwriteOutput: true
                  misspellingCollection: product_demo_query_rewrite_staging
                  misspellingsFilterQuery: type:spell
                  keyPhraseCollection: product_demo_query_rewrite_staging
                  keyPhraseFilterQuery: type:phrase
                  countField: aggr_count_i
                  fieldToVectorize: query_s
                  docIdField: doc_id_s
                  overlapThreshold: 0.5
                  minQueryCount: 5
                  similarityThreshold: 0.01
                  trainingDataSamplingFraction: 1
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  randomSeed: 1234
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  misspellingSQL: >-

                    SELECT surface_form AS misspelling_s, output AS correction_s

                    FROM spell_input

                    WHERE doc_type  = 'query_rewrite' AND type = 'spell' AND
                    (review = 'approved' OR review = 'auto')

                        
                  misspellingSQLDataFormat: solr
                  phraseSQL: >-

                    SELECT surface_form AS phrases_s, coalesce(confidence, 1) AS
                    likelihood_d, coalesce(word_count,1) AS word_num_i

                    FROM phrase_input

                    WHERE doc_type  = 'query_rewrite' AND type = 'phrase' AND
                    (review = 'approved' OR review = 'auto')

                        
                  phraseSQLDataFormat: solr
                  type: synonymDetection
                  updates:
                    - userId: eric.redman@lucidworks.com
                      timestamp: '2025-04-15T15:32:23.452449115Z'
                - id: product_demo_head_tail
                  randomSeed: 1234
                  trainingCollection: product_demo_signals
                  outputCollection: product_demo_job_reports
                  overwriteOutput: true
                  trainingDataFilterQuery: '*:*'
                  fieldToVectorize: query
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name": "StdTokLowerStem","charFilters":
                    [ { "type": "htmlstrip" } ],"tokenizer": { "type":
                    "standard" },"filters": [{ "type": "lowercase" },{ "type":
                    "englishminimalstem" }] }],"fields": [{ "regex": ".+",
                    "analyzer": "StdTokLowerStem" } ]}
                  countField: count_i
                  mainType: click
                  filterType: response
                  signalTypeField: type
                  minCountMain: 1
                  minCountFilter: 20
                  queryLenThreshold: 2
                  userHead: -1
                  userTail: -1
                  topQ:
                    - 100
                    - 0.01
                  trafficPerc:
                    - 0.25
                    - 0.5
                    - 0.75
                  lastTraffic:
                    - 0.01
                  trafficCount:
                    - 5
                  overlapThreshold: 4
                  lenScale: 6
                  overlapNumBoost: 10
                  headQueryCntBoost: 1
                  tailRewrite: true
                  sparkPartitions: 200
                  enableAutoPublish: false
                  tailRewriteCollection: product_demo_query_rewrite_staging
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: headTailAnalysis
                  trainingDataSamplingFraction: 1
                  updates:
                    - userId: eric.redman@lucidworks.com
                      timestamp: '2025-04-15T15:32:34.680465167Z'
                - id: Synthetic_dataset_test_spell_correction
                  trainingDataFilterQuery: '*:*'
                  trainingCollection: Synthetic_dataset_test_signals
                  outputCollection: Synthetic_dataset_test_query_rewrite_staging
                  overwriteOutput: true
                  maxDistance: 2
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  analyzerConfigDictionary: >-
                    { "analyzers": [ { "name":
                    "LetterTokLowerStem","charFilters": [ { "type": "htmlstrip"
                    } ],"tokenizer": { "type": "letter" },"filters": [{ "type":
                    "lowercase" },{ "type": "KStem" }] }],"fields": [{ "regex":
                    ".+", "analyzer": "LetterTokLowerStem" } ]}
                  lenScale: 5
                  fieldToVectorize: query
                  mainType: click
                  filterType: response
                  signalTypeField: type
                  minCountMain: 1
                  minCountFilter: 10
                  trainingDataSamplingFraction: 1
                  countField: count_i
                  dictionaryDataFilterQuery: '*:*'
                  minPrefix: 1
                  minMispellingLen: 5
                  correctionThreshold: 0.8
                  misspellingThreshold: 0.8
                  lastCharMatchBoost: 1
                  soundMatchBoost: 3
                  correctCntBoost: 2
                  editDistBoost: 2
                  corMisRatio: 3
                  randomSeed: 1234
                  signalDataIndicator: true
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: tokenPhraseSpellCorrection
                  updates:
                    - userId: docs
                      timestamp: '2025-08-08T16:50:31.008454832Z'
                - id: api-test-app_phrase_extraction
                  trainingDataFilterQuery: '*:*'
                  randomSeed: 8180
                  analyzerConfig: >-
                    { "analyzers": [{ "name": "StdTokLowerStop","charFilters": [
                    { "type": "htmlstrip" } ],"tokenizer": { "type": "standard"
                    },"filters": [{ "type": "lowercase" }] }],"fields": [{
                    "regex": ".+", "analyzer": "StdTokLowerStop" } ]}
                  trainingCollection: api-test-app_signals
                  outputCollection: api-test-app_query_rewrite_staging
                  overwriteOutput: true
                  fieldToVectorize: query
                  trainingDataSamplingFraction: 1
                  ngramSize: 3
                  minmatch: 100
                  attachPhrases: false
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: sip
                  sourceFields: query
                  updates:
                    - userId: laureltraining
                      timestamp: '2025-10-02T19:41:32.880887211Z'
                - id: api-test-app_head_tail
                  randomSeed: 1234
                  trainingCollection: api-test-app_signals
                  outputCollection: api-test-app_job_reports
                  overwriteOutput: true
                  trainingDataFilterQuery: '*:*'
                  fieldToVectorize: query
                  analyzerConfigQuery: >-
                    { "analyzers": [ { "name": "StdTokLowerStem","charFilters":
                    [ { "type": "htmlstrip" } ],"tokenizer": { "type":
                    "standard" },"filters": [{ "type": "lowercase" },{ "type":
                    "englishminimalstem" }] }],"fields": [{ "regex": ".+",
                    "analyzer": "StdTokLowerStem" } ]}
                  countField: count_i
                  mainType: click
                  filterType: response
                  signalTypeField: type
                  minCountMain: 1
                  minCountFilter: 20
                  queryLenThreshold: 2
                  userHead: -1
                  userTail: -1
                  topQ:
                    - 100
                    - 0.01
                  trafficPerc:
                    - 0.25
                    - 0.5
                    - 0.75
                  lastTraffic:
                    - 0.01
                  trafficCount:
                    - 5
                  overlapThreshold: 4
                  lenScale: 6
                  overlapNumBoost: 10
                  headQueryCntBoost: 1
                  tailRewrite: true
                  sparkPartitions: 200
                  enableAutoPublish: false
                  tailRewriteCollection: api-test-app_query_rewrite_staging
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: headTailAnalysis
                  trainingDataSamplingFraction: 1
                  updates:
                    - userId: laureltraining
                      timestamp: '2025-10-02T19:41:38.037038966Z'
                - id: product_demo_phrase_extraction
                  trainingDataFilterQuery: '*:*'
                  randomSeed: 8180
                  analyzerConfig: >-
                    { "analyzers": [{ "name": "StdTokLowerStop","charFilters": [
                    { "type": "htmlstrip" } ],"tokenizer": { "type": "standard"
                    },"filters": [{ "type": "lowercase" }] }],"fields": [{
                    "regex": ".+", "analyzer": "StdTokLowerStop" } ]}
                  trainingCollection: product_demo_signals
                  outputCollection: product_demo_query_rewrite_staging
                  overwriteOutput: true
                  fieldToVectorize: query
                  trainingDataSamplingFraction: 1
                  ngramSize: 3
                  minmatch: 100
                  attachPhrases: false
                  sparkPartitions: 200
                  enableAutoPublish: false
                  dataFormat: solr
                  dataOutputFormat: solr
                  sparkSQL: SELECT * from spark_input
                  type: sip
                  sourceFields: query
                  updates:
                    - userId: eric.redman@lucidworks.com
                      timestamp: '2025-04-15T15:32:23.352147553Z'
                - id: Synthetic_dataset_test_session_rollup
                  inputCollection: Synthetic_dataset_test_signals
                  outputCollection: Synthetic_dataset_test_signals
                  sql: |-
                    WITH session_agg AS (
                         SELECT COUNT(1) AS activity_count,
                                MIN(timestamp_tdt) AS start,
                                MAX(timestamp_tdt) AS end,
                                timediff(MAX(timestamp_tdt), MIN(timestamp_tdt), "MINUTES") AS duration,
                                'session' AS type,
                                first(user_id) AS user,
                                session_keywords(query) AS keywords,
                                session
                           FROM ${inputCollection}
                          WHERE timestamp_tdt IS NOT NULL
                            AND type != 'session'
                            AND session IS NOT NULL
                            AND session NOT IN (SELECT session FROM ${inputCollection} WHERE type = 'session' AND session IS NOT NULL)
                       GROUP BY session
                         HAVING timediff(current_timestamp(), MAX(timestamp_tdt), "SECONDS") >= ${elapsedSecsSinceLastActivity} OR timediff(current_timestamp(), MIN(timestamp_tdt), "SECONDS") >= ${elapsedSecsSinceSessionStart})
                     SELECT activity_count, start, end, duration, type, user, keywords, session FROM session_agg
                  parameters:
                    - key: elapsedSecsSinceLastActivity
                      value: '360'
                    - key: elapsedSecsSinceSessionStart
                      value: '3600'
                  selectQuery: '*:*'
                  sourceRemove: false
                  sourceCatchup: false
                  hiddenParameters:
                    - key: outputProjectedFieldsOnly
                      value: 'true'
                  skipCheckEnabled: false
                  notes: >-
                    Aggregate related user activity into a session signal
                    containing activity count, duration, and keywords (based on
                    user search terms). This job is used by the Fusion Insights
                    application to show reports about user sessions. Use the
                    'elapsedSecsSinceLastActivity' and
                    'elapsedSecsSinceSessionStart' parameters to determine when
                    a user session is considered to be complete.
                  useNaturalKey: true
                  optimizeSegments: 0
                  dataFormat: solr
                  sparkSQL: SELECT * FROM spark_input
                  sparkPartitions: 200
                  type: sql_template
                  updates:
                    - userId: docs
                      timestamp: '2025-08-08T16:50:37.961250096Z'
                - id: index_synthetic_data
                  format: json
                  path: >-
                    gs://lucidworks-example-data/hardware/1000/1000_synthetic.json
                  outputCollection: Synthetic_dataset_test
                  outputIndexPipeline: Synthetic_dataset_test
                  clearDatasource: false
                  defineFieldsUsingInputSchema: true
                  atomicUpdates: false
                  cacheAfterRead: false
                  continueAfterFailure: false
                  type: parallel-bulk-loader
                  updates:
                    - userId: docs
                      timestamp: '2025-08-14T20:50:09.847634657Z'
components:
  securitySchemes:
    Basic auth:
      type: http
      scheme: basic
    API key:
      name: x-api-key
      type: apiKey
      in: header

````