> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Get the jobs schema

> Get the configuration schemas for all job types.



## OpenAPI

````yaml /api-reference/5.9/fusion-api-job-rest-server.json get /spark/schema
openapi: 3.0.1
info:
  title: Job REST Server API
  description: This group of APIs allows you to manage jobs, recommendations, and signals.
  contact:
    name: Lucidworks
    url: www.lucidworks.com
    email: support@lucidworks.com
  license:
    name: License of the API
    url: https://lucidworks.com/legal/developer-license-agreement/
  version: '5.9'
servers:
  - url: https://{FUSION HOST}/api
    description: Fusion
    variables:
      FUSION HOST:
        default: FUSION_HOST
        description: Your environment host.
  - url: https://{FUSION HOST}/api/apps/{APP_NAME}
    description: Fusion app
    variables:
      FUSION HOST:
        default: FUSION_HOST
        description: Your environment host.
      APP_NAME:
        default: APP_NAME
        description: The name of your Fusion application.
security:
  - Basic auth: []
  - API key: []
tags:
  - name: Recommendation Controller API
    description: Endpoints for the recommendation controller.
  - name: Signals Controller API
    description: Endpoints for the signals controller.
  - name: Spark Job Config Controller API
    description: Use these endpoints to manage Spark job configurations.
  - name: Spark Job Controller API
    description: Use these endpoints to start, stop, and check the status of Spark jobs.
externalDocs:
  description: Lucidworks Documentation
  url: https://doc.lucidworks.com/
paths:
  /spark/schema:
    get:
      tags:
        - Spark Job Config Controller API
      summary: Get the jobs schema
      description: Get the configuration schemas for all job types.
      operationId: getConfigurationTypes
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ObjectType'
              example:
                type: object
                properties: {}
                oneOf:
                  - type: object
                    title: Query-to-Query Session Based Similarity
                    description: >-
                      Use this job to to batch compute query-query similarities
                      using a co-occurrence based approach
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - docIdField
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Input Collection
                        description: >-
                          Collection containing queries, document id and event
                          counts. Can be either signal aggregation collection or
                          raw signals collection.
                      fieldToVectorize:
                        type: string
                        title: Query Field Name
                        description: Field containing queries.
                        default: query_s
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Data filter query
                        description: >-
                          Solr query to additionally filter the input
                          collection.
                        default: '*:*'
                        hints:
                          - dummy
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output collection
                        description: Collection to store synonym and similar query pairs.
                        hints:
                          - dummy
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - dummy
                          - hidden
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      specialCharsFilterString:
                        type: string
                        title: Special characters to be filtered out
                        description: >-
                          String of special characters to be filtered from
                          queries.
                        default: ~!@#$^%&*\(\)_+={}\[\]|;:"'<,>.?`/\\-
                        hints:
                          - advanced
                      minQueryLength:
                        type: integer
                        title: Minimum query length
                        description: >-
                          Queries below this length (in number of characters)
                          will not be considered for generating recommendations.
                        default: 3
                        minimum: 1
                        exclusiveMinimum: false
                      maxQueryLength:
                        type: integer
                        title: Maximum query length
                        description: >-
                          Queries above this length will not be considered for
                          generating recommendations.
                        default: 50
                        minimum: 1
                        exclusiveMinimum: false
                      countField:
                        type: string
                        title: Event Count Field Name
                        description: >-
                          Solr field containing number of events (e.g., number
                          of clicks).
                        default: count_i
                      docIdField:
                        type: string
                        title: Document id Field Name
                        description: Solr field containing document id that user clicked.
                        default: doc_id_s
                      overlapThreshold:
                        type: number
                        title: Query Similarity Threshold
                        description: >-
                          The threshold above which query pairs are consider
                          similar. Decreasing the value can fetch more pairs at
                          the expense of quality.
                        default: 0.3
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      minQueryCount:
                        type: integer
                        title: Query Clicks Threshold
                        description: >-
                          The minimum number of clicked documents needed for
                          comparing queries.
                        default: 1
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      overlapEnabled:
                        type: boolean
                        title: Boost on token overlap
                        description: >-
                          Maximize score for query pairs with overlapping tokens
                          by setting score to 1.
                        default: true
                        hints:
                          - advanced
                      tokenOverlapValue:
                        type: number
                        title: Minimum match for token overlap
                        description: >-
                          Minimum amount of overlap to consider for boosting. To
                          specify overlap in terms of ratio, specify a value in
                          (0, 1). To specify overlap in terms of exact count,
                          specify a value >= 1. If value is 0, boost will be
                          applied if one query is a substring of its
                          pair.Stopwords are ignored while counting overlaps.
                        default: 1
                        hints:
                          - advanced
                      sessionIdField:
                        type: string
                        title: Session/User ID field
                        description: >-
                          If session id is not available, specify user id field
                          instead. If this field is left blank, session based
                          recommendations will be disabled.
                        default: session_id_s
                      minPairOccCount:
                        type: integer
                        title: Minimum query-recommendation pair occurrence count
                        description: >-
                          Minimum number of times a query pair must be generated
                          to be considered valid.
                        default: 2
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      stopwordsBlobName:
                        type: string
                        title: Stopwords Blob Store
                        description: >-
                          Name of the stopwords blob resource. This is a .txt
                          file with one stopword per line. By default the file
                          is called stopwords/stopwords_nltk_en.txt however a
                          custom file can also be used. Check documentation for
                          more details on format and uploading to blob store.
                        default: stopwords/stopwords_en.txt
                        reference: blob
                        blobType: file:spark
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - similar_queries
                        default: similar_queries
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - countField
                          - docIdField
                          - sessionIdField
                      - label: Model Tuning Parameters
                        properties:
                          - minQueryLength
                          - maxQueryLength
                          - specialCharsFilterString
                          - stopwordsBlobName
                          - overlapThreshold
                          - overlapEnabled
                          - tokenOverlapValue
                          - minQueryCount
                          - minPairOccCount
                  - type: object
                    title: Smart Answers Coldstart Training (deprecated)
                    description: >-
                      Trains Smart Answers model on a cold start (unsupervised)
                      basis with with pre-trained or trained embeddings and
                      deploys the trained model to the ML Model Service
                    required:
                      - id
                      - trainingCollection
                      - trainingFormat
                      - textColName
                      - deployModelName
                      - modelBase
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training data path
                        description: >-
                          Solr collection or cloud storage path where training
                          data is present.
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Training data format
                        description: The format of the training data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`
                        hints:
                          - code/sql
                          - advanced
                      textColName:
                        type: string
                        title: Field which contains the content documents
                        description: >-
                          Field which contains the documents that will be used
                          to learn about the vocabulary. If multiple fields,
                          please separate them by comma, e.g. question,answer.
                      deployModelName:
                        type: string
                        title: Model Deployment Name
                        description: >-
                          Name of the model to be used for deployment (must be a
                          valid lowercased DNS subdomain with no underscores).
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      modelBase:
                        type: string
                        title: Model base
                        description: >-
                          Specify one of these custom embeddings:
                          ['word_custom', 'bpe_custom'] or choose one of the
                          included pre-trained embeddings / models.
                        enum:
                          - word_custom
                          - bpe_custom
                          - word_en_300d_2M
                          - bpe_en_300d_10K
                          - bpe_en_300d_200K
                          - bpe_ja_300d_100K
                          - bpe_ko_300d_100K
                          - bpe_zh_300d_50K
                          - bpe_multi_300d_320K
                          - distilbert_en
                          - distilbert_multi
                          - biobert_v1.1
                        default: word_en_300d_2M
                      testMode:
                        type: boolean
                        title: Test Mode
                        description: >-
                          If set to true, then the training will exit after the
                          first iteration. Useful for ensuring that the
                          end-to-end pipeline is working
                        default: false
                        hints:
                          - hidden
                      modelReplicas:
                        type: integer
                        title: Model replicas
                        description: >-
                          How many replicas of the model should be deployed by
                          Seldon Core
                        default: 1
                      w2vEpochs:
                        type: integer
                        title: Word2Vec training epochs
                        description: Number of epochs to train custom Word2Vec embeddings
                        default: 15
                        hints:
                          - advanced
                      w2vVectorSize:
                        type: integer
                        title: Size of word vectors
                        description: >-
                          Word-vector dimensionality to represent text
                          (suggested dimension ranges: 100~300)
                        default: 150
                        hints:
                          - advanced
                      w2vWindowSize:
                        type: integer
                        title: Word2Vec window size
                        description: >-
                          The window size (context words from [-window, window])
                          for Word2Vec
                        default: 8
                        hints:
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: >-
                          The proportion of data to be sampled from the full
                          dataset. Use a value between 0 and 1 for a proportion
                          (e.g. 0.5 for 50%), or for a specific number of
                          examples, use an integer larger than 1. Leave blank
                          for no sampling
                        hints:
                          - advanced
                      seed:
                        type: integer
                        title: Seed
                        description: Random seed for sampling
                        default: 12345
                        hints:
                          - hidden
                      minTokensNum:
                        type: integer
                        title: Minimum number of words in doc
                        description: >-
                          Drop document if the total words is lower than this
                          value
                        default: 1
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      maxTokensNum:
                        type: integer
                        title: Maximum number of words in doc
                        description: >-
                          Drop document if the total words is greater than this
                          value
                        default: 5000
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      lowerCases:
                        type: boolean
                        title: Lower case all words
                        description: >-
                          Whether to lower case all words in training, i.e.
                          whether to treat upper case and lower case words
                          equally. Only utilized for custom embeddings or for
                          the default model base: word_en_300d_2M.
                        default: true
                      maxVocabSize:
                        type: integer
                        title: Maximum vocabulary size
                        description: >-
                          Maximum number of words in vocabulary, words will be
                          trimmed if frequency is too low
                        default: 100000
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      extraTrainingArgs:
                        type: string
                        title: Extra training args for Python scripts
                        description: >-
                          Add any additional arguments for the Python training
                          scripts in this field
                        hints:
                          - hidden
                      maxLen:
                        type: integer
                        title: Max Length
                        description: Max length of question/answer by number of tokens
                      infBatch:
                        type: integer
                        title: Inference batch size
                        description: The batch size used for encoding during the training
                        hints:
                          - advanced
                      numClusters:
                        type: integer
                        title: Number of clusters
                        description: >-
                          DEPRECATED: please, consider using Milvus for fast
                          dense vector similarity search. Number of clusters to
                          be used for fast dense vector retrieval. Note no
                          clustering will be applied if this is set to 0. If
                          left blank, cluster count will be inferred by the job
                          depending on the data
                        default: 0
                        hints:
                          - advanced
                      topKClusters:
                        type: integer
                        title: Top k of clusters to return
                        description: >-
                          How many closest clusters the model can find for each
                          query. At retrieval time, all answers in top k nearest
                          clusters will be returned and reranked
                        default: 10
                        hints:
                          - advanced
                      unidecode:
                        type: boolean
                        title: Apply unicode decoding
                        description: >-
                          Use Unidecode library to transform Unicode input into
                          ASCII transliterations. Only utilized for custom
                          embeddings or for the default model base:
                          word_en_300d_2M
                        default: true
                      globalPoolType:
                        type: string
                        title: Global Pool Type
                        description: >-
                          Determines how token vectors should be aggregated to
                          obtain final content vector. Must be one of: [avg,
                          max].
                        enum:
                          - avg
                          - max
                        default: avg
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-qna-coldstart
                        default: argo-qna-coldstart
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - trainingFormat
                          - textColName
                          - deployModelName
                          - modelReplicas
                          - secretName
                          - testMode
                      - label: Data Preprocessing
                        properties:
                          - trainingDataFilterQuery
                          - trainingSampleFraction
                          - seed
                          - minTokensNum
                          - maxTokensNum
                          - lowerCases
                          - unidecode
                          - maxVocabSize
                      - label: Custom Embeddings Initialization
                        properties:
                          - w2vEpochs
                          - w2vVectorSize
                          - w2vWindowSize
                      - label: Model Tuning Parameters
                        properties:
                          - maxLen
                          - infBatch
                          - numClusters
                          - topKClusters
                          - globalPoolType
                  - type: object
                    title: Ranking Metrics
                    description: >-
                      use this job to calculate relevance metrics (nDCG etc..)
                      by replaying ground truth queries (see ground truth job)
                      against catalog data using variants from an experiment.
                    required:
                      - id
                      - groundTruthConfig
                      - rankingExperimentConfig
                      - outputCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      groundTruthConfig:
                        type: object
                        title: Configure ground truth dataset
                        description: Configure properties for Ground truth dataset
                        required:
                          - inputCollection
                        properties:
                          inputCollection:
                            type: string
                            title: Ground Truth Input Collection
                            description: Input collection representing ground truth dataset
                            minLength: 1
                          filterQueries:
                            type: array
                            title: Filter queries
                            description: >-
                              Solr filter queries to apply against Ground truth
                              collection
                            default:
                              - type:ground_truth
                            hints:
                              - advanced
                            items:
                              type: string
                              default: '["type:ground_truth"]'
                          queryField:
                            type: string
                            title: Query field
                            description: Query field in the collection
                            default: query
                            hints:
                              - advanced
                          docIdField:
                            type: string
                            title: Doc ID field
                            description: Field containing ranked doc id's
                            default: docId
                            hints:
                              - advanced
                          weightField:
                            type: string
                            title: Weight Field
                            description: >-
                              Field representing the weight of document to the
                              query
                            default: weight
                            hints:
                              - advanced
                      rankingExperimentConfig:
                        type: object
                        title: Configure experiment
                        description: Configure properties for the experiment
                        properties:
                          inputCollection:
                            type: string
                            title: Input Collection
                            description: Collection to run the experiment on
                            hints:
                              - advanced
                            minLength: 1
                          queryPipelines:
                            type: array
                            title: Query pipelines
                            description: Pipeline variants for experiment
                            hints:
                              - advanced
                            items:
                              type: string
                          docIdField:
                            type: string
                            title: Doc Id Field
                            description: >-
                              Doc id field to retrieve values (Must return
                              values that match the ground truth data)
                            default: id
                            hints:
                              - advanced
                          experimentId:
                            type: string
                            title: Experiment ID
                            description: >-
                              Calculate ranking metrics using variants from
                              experiment
                            minLength: 1
                          experimentObjectiveName:
                            type: string
                            title: Experiment metric name
                            description: Experiment objective name
                            minLength: 1
                          defaultProfile:
                            type: string
                            title: Default Query Profile
                            description: >-
                              Default query profile to use if not specified in
                              experiment variants
                      outputCollection:
                        type: string
                        title: Output collection
                        description: Output collection to save the ranking metrics to
                        minLength: 1
                      rankingPositionK:
                        type: integer
                        title: Ranking Position @K
                        description: Ranking position at K for metrics calculation
                        default: 10
                        hints:
                          - advanced
                      metricsPerQuery:
                        type: boolean
                        title: Calculate metrics per query
                        description: >-
                          Calculate ranking metrics per each query in ground
                          truth set and save them to Solr collection
                        default: true
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - ranking_metrics
                        default: ranking_metrics
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Ground Truth Parameters
                        properties:
                          - groundTruthConfig
                      - label: Ranking Experiment Parameters
                        properties:
                          - rankingExperimentConfig
                  - type: object
                    title: Data Augmentation (deprecated)
                    description: Use this job to perform Text Augmentation
                    required:
                      - id
                      - trainingCollection
                      - trainingFormat
                      - outputCollection
                      - outputFormat
                      - includeOriginalData
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Input path
                        description: >-
                          Solr collection or cloud storage path where training
                          data is present.
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Input format
                        description: The format of the training data - solr, parquet etc.
                        minLength: 1
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`
                        hints:
                          - code/sql
                          - advanced
                      randomSeed:
                        type: integer
                        title: Random Seed
                        description: >-
                          Pseudorandom determinism fixed by keeping this seed
                          constant
                        default: 12345
                        hints:
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: Choose a fraction of the data for training.
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      batchSize:
                        type: string
                        title: Batch Size
                        description: >-
                          If writing to solr, this field defines the batch size
                          for documents to be pushed to solr.
                        default: '15000'
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output path
                        description: Output collection to store generated augmented data.
                        minLength: 1
                      outputFormat:
                        type: string
                        title: Output Format
                        description: The format of the output data - solr, parquet etc.
                        minLength: 1
                      partitionFields:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      backTranslations:
                        type: array
                        title: Back Translation
                        description: >-
                          Augment data via translation to a different language
                          and then translating back to original language. Chain
                          of languages can be used for translation. Works at
                          sentence level for medium-long length text. GPU
                          recommended and will be used when available.
                        items:
                          type: object
                          required:
                            - fieldname
                            - inputLanguage
                          properties:
                            fieldname:
                              type: string
                              title: Field Name
                              description: Name of the input field to augment.
                              minLength: 1
                            inputLanguage:
                              type: string
                              title: Input data Language
                              description: Language of input data.
                              enum:
                                - English
                                - French
                                - German
                                - Italian
                                - Spanish
                                - Dutch
                                - Polish
                                - Hebrew
                                - Ukrainian
                                - Chinese
                                - Japanese
                                - Korean
                              minLength: 1
                            intermediateLanguage:
                              type: string
                              title: Intermediate Language
                              description: >-
                                Specify languages in order to be used in back
                                translation separated by comma. Only use
                                languages present in input data language
                                dropdown. Bigger chains will take more time to
                                augment. 
                              default: German
                              pattern: >-
                                ((?:English|German|French|Italian|Spanish|Dutch|Polish|Ukrainian|Hebrew|Chinese|Japanese|Korean)*(\s)*(,)*(\s)*){0,12}
                            batchSize:
                              type: integer
                              title: Batch Size
                              description: >-
                                Number of input data samples to back-translate
                                at a time. Important if Use GPU is checked to
                                avoid memory overflow.
                              default: 256
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            beamSize:
                              type: integer
                              title: Beam Size
                              description: >-
                                Number of beams to evaluate during translation.
                                Use higher number if translation is poor. Higher
                                number will increase execution time and memory
                                use.
                              default: 1
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            minSentenceLength:
                              type: integer
                              title: Min translation length (tokens)
                              description: >-
                                Do not back translate sentences shorter than
                                specified length in tokens. If the value is more
                                than max translation length, then max
                                translation length will be used.
                              default: 40
                              hints:
                                - advanced
                              maximum: 510
                              exclusiveMaximum: false
                              minimum: 0
                              exclusiveMinimum: false
                            maxSentenceLength:
                              type: integer
                              title: Max translation length (tokens)
                              description: >-
                                Do not back translate sentences longer than
                                specified length in tokens. If the value is less
                                than min translation length, hen min translation
                                length will be used.
                              default: 240
                              hints:
                                - advanced
                              maximum: 510
                              exclusiveMaximum: false
                              minimum: 0
                              exclusiveMinimum: false
                      keyStrokeMisspellings:
                        type: array
                        title: Keystroke Misspellings
                        description: >-
                          Augment data via insertion, substitution, swapping and
                          deletion of characters based on keyboard layout.
                          Useful for short text.
                        items:
                          type: object
                          required:
                            - fieldname
                            - inputLanguage
                          properties:
                            fieldname:
                              type: string
                              title: Field Name
                              description: Name of the input field to augment.
                              minLength: 1
                            inputLanguage:
                              type: string
                              title: Input data Language
                              description: Language of input data.
                              enum:
                                - English
                                - French
                                - German
                                - Italian
                                - Spanish
                                - Dutch
                                - Polish
                                - Hebrew
                                - Ukrainian
                              minLength: 1
                            minCharAugment:
                              type: integer
                              title: Minimum Chars to Augment
                              description: >-
                                Minimum number of characters to augment in each
                                word. If the value is more than Maximum Chars to
                                Augment, then Maximum Chars to Augment will be
                                used.
                              default: 1
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            maxCharAugment:
                              type: integer
                              title: Maximum Chars to Augment
                              description: >-
                                Maximum number of characters to augment in each
                                word. If the value is more than Minimum Chars to
                                Augment, then Minimum Chars to Augment will be
                                used.
                              default: 2
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            minWordsToAugment:
                              type: integer
                              title: Min words to Augment
                              description: >-
                                Minimum number of words to be augmented in input
                                text. It should be less than maximum words to
                                augment otherwise max value will be used.
                                Suggested value is 2.
                              default: 2
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            maxWordsToAugment:
                              type: integer
                              title: Max words to Augment
                              description: >-
                                Maximum number of words to be augmented in input
                                text.It should be less than minimum words to
                                augment otherwise min value will be
                                auto-adjusted. Suggested value is 10.
                              default: 10
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            wordPercentageToAugment:
                              type: number
                              title: Percentage words to Augment
                              description: >-
                                Percentage of words in input text to augment. If
                                specified this will be used instead if
                                minimum/maximum number of words to augment
                                value.
                              default: 0.2
                              hints:
                                - advanced
                              maximum: 1
                              exclusiveMaximum: false
                            keywordsBlobName:
                              type: string
                              title: Keystroke Mapping
                              description: >-
                                Keystroke Mapping for required language in JSON
                                format from blob store.
                              hints:
                                - advanced
                              reference: blob
                              blobType: file:spark
                      synonymSubstitutions:
                        type: array
                        title: Synonym Substitution
                        description: >-
                          Augment data via substituting words using synonyms
                          from wordnet or user supplied dictionary. Useful for
                          short, medium and long text. Faster and less resource
                          intensive than back translation.
                        items:
                          type: object
                          required:
                            - fieldname
                            - inputLanguage
                          properties:
                            fieldname:
                              type: string
                              title: Field Name
                              description: Name of the input field to augment.
                              minLength: 1
                            inputLanguage:
                              type: string
                              title: Input data Language
                              description: Language of input data.
                              enum:
                                - English
                                - French
                                - German
                                - Italian
                                - Spanish
                                - Dutch
                                - Polish
                                - Hebrew
                                - Chinese
                                - Japanese
                              minLength: 1
                            minWordsToAugment:
                              type: integer
                              title: Min words to Augment
                              description: >-
                                Minimum number of words to be augmented in input
                                text. It should be less than maximum words to
                                augment otherwise max value will be used.
                                Suggested value is 2.
                              default: 2
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            maxWordsToAugment:
                              type: integer
                              title: Max words to Augment
                              description: >-
                                Maximum number of words to be augmented in input
                                text.It should be less than minimum words to
                                augment otherwise min value will be
                                auto-adjusted. Suggested value is 10.
                              default: 10
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            wordPercentageToAugment:
                              type: number
                              title: Percentage of words to Augment
                              description: >-
                                Percentage of words in input text to augment. If
                                specified this will be used instead if
                                minimum/maximum number of words to augment
                                value.
                              default: 0.2
                              hints:
                                - advanced
                              maximum: 1
                              exclusiveMaximum: false
                            stopwordsBlobName:
                              type: string
                              title: Synonym Dictionary Name
                              description: Wordnet format dictionary to use from blob store
                              hints:
                                - advanced
                              reference: blob
                              blobType: file:spark
                      splitWords:
                        type: array
                        title: Split Words
                        description: >-
                          Augment data via splitting some words. Useful for
                          short, medium and long text.
                        items:
                          type: object
                          required:
                            - fieldname
                            - inputLanguage
                          properties:
                            fieldname:
                              type: string
                              title: Field Name
                              description: Name of the input field to augment.
                              minLength: 1
                            inputLanguage:
                              type: string
                              title: Input data Language
                              description: Language of input data.
                              enum:
                                - English
                                - French
                                - German
                                - Italian
                                - Spanish
                                - Dutch
                                - Polish
                              minLength: 1
                            minWordLength:
                              type: integer
                              title: Minimum Word Length
                              description: >-
                                Do not augment words less than this length (in
                                characters). If the value is more than maximum
                                word length, then maximum word length will be
                                used.
                              default: 4
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            minWordsToAugment:
                              type: integer
                              title: Min words to Augment
                              description: >-
                                Minimum number of words to be augmented in input
                                text. It should be less than maximum words to
                                augment otherwise max value will be used.
                                Suggested value is 2.
                              default: 2
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            maxWordsToAugment:
                              type: integer
                              title: Max words to Augment
                              description: >-
                                Maximum number of words to be augmented in input
                                text.It should be less than minimum words to
                                augment otherwise min value will be
                                auto-adjusted. Suggested value is 10.
                              default: 10
                              hints:
                                - advanced
                              minimum: 0
                              exclusiveMinimum: false
                            wordPercentageToAugment:
                              type: number
                              title: Percentage of words to Augment
                              description: >-
                                Percentage of words in input text to augment. If
                                specified this will be used instead if
                                minimum/maximum number of words to augment
                                value.
                              default: 0.2
                              hints:
                                - advanced
                              maximum: 1
                              exclusiveMaximum: false
                      includeOriginalData:
                        type: boolean
                        title: Include original data
                        description: >-
                          When checked original data will be included in the
                          augmented dataset
                        default: true
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-data-augmentation
                        default: argo-data-augmentation
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - trainingFormat
                          - trainingDataFilterQuery
                          - trainingSampleFraction
                          - randomSeed
                          - batchSize
                          - outputCollection
                          - outputFormat
                          - partitionFields
                          - secretName
                          - includeOriginalData
                      - label: Augmentation Parameters
                        properties:
                          - backTranslations
                          - keyStrokeMisspellings
                          - synonymSubstitutions
                          - splitWords
                  - type: object
                    title: Create Indexes in Milvus (deprecated)
                    description: Creates indexes for specified collections in Milvus
                    required:
                      - id
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      indexes-list:
                        type: array
                        title: Indexes
                        description: >-
                          List of the indexes that should be created with
                          corresponding params.
                        items:
                          type: object
                          required:
                            - milvusCollectionName
                            - indexType
                          properties:
                            milvusCollectionName:
                              type: string
                              title: Collection Name
                              description: >-
                                Name of the collection in Milvus in which index
                                should be created
                            indexType:
                              type: string
                              title: Index Type
                              description: >-
                                Index type which should be create for specified
                                collection
                              enum:
                                - FLAT
                                - IVFLAT
                                - IVF_SQ8
                                - RNSG
                                - IVF_SQ8H
                                - IVF_PQ
                                - HNSW
                                - ANNOY
                              default: HNSW
                            indexParams:
                              type: array
                              title: Index Parameters
                              description: >-
                                Parameters to be used to create index in Milvus.
                                Specific to the chosen IndexType. For example,
                                good starting values might be [M=36,
                                efConstruction=500] for HNSW index and
                                [nlist=4×sqrt(number of vectors)] for IVF
                                indexes.
                              items:
                                type: object
                                properties:
                                  key:
                                    type: string
                                    title: Milvus Index Param
                                    description: >-
                                      The name of the Milvus index params like 
                                      M / efConstruction for HNSW or nlist for
                                      IVF indexes.
                                  value:
                                    type: integer
                                    title: Value
                                    description: >-
                                      Index param value. For example, good
                                      starting values might be [M=36,
                                      efConstruction=500] for HNSW index and
                                      [nlist=4×sqrt(number of vectors)] for IVF
                                      indexes.
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-milvus-create-indexes
                        default: argo-milvus-create-indexes
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Custom Python Job
                    description: Use this job when you want to run a python/pyspark job
                    required:
                      - id
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      script:
                        type: string
                        title: Python Script
                        description: >-
                          Custom python/pyspark script to be submitted as a
                          Fusion job
                        hints:
                          - code/python
                          - lengthy
                        minLength: 1
                      resourceName:
                        type: string
                        title: Blob Resource (python file)
                        description: >-
                          Name of the resource uploaded to Blob store. This
                          should match with the Blob name
                        minLength: 1
                        reference: blob
                        blobType: file:spark
                      pythonFiles:
                        type: array
                        title: Python Files
                        description: >-
                          Blob resource (.zip, .egg, .py files) to place on the
                          PYTHONPATH for Python apps
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      submitArgs:
                        type: array
                        title: Spark args
                        description: >-
                          Additional options to pass to the Spark Submit when
                          running this job.
                        hints:
                          - advanced
                        items:
                          type: string
                      javaOptions:
                        type: array
                        title: Java options
                        description: Java options to pass to Spark driver/executor
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      verboseReporting:
                        type: boolean
                        title: Verbose reporting
                        description: Enables verbose reporting for SparkSubmit
                        default: true
                        hints:
                          - advanced
                      envOptions:
                        type: array
                        title: ENV properties
                        description: Set environment variables for driver
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - custom_python_job
                        default: custom_python_job
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Head/Tail Analysis (Deprecated)
                    description: >-
                      Use this job when you want to compare the head and tail of
                      your queries to find common misspellings and rewritings.
                      See the insights analytics pane for a review of the
                      results of the job. This job is deprecated.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - countField
                      - mainType
                      - signalTypeField
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Input Collection
                        description: >-
                          Signals collection containing queries and event
                          counts. Raw signals or aggregation collection can be
                          used. If aggregation collection is being used, update
                          the filter query in advanced options
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Query Field Name
                        description: Field containing the queries
                        default: query
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Signals data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr (e.g. type:click OR type:response), Spark SQL
                          expression for all other data sources
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: >-
                          Solr collection to store head tail analytics results.
                          Defaults to job reports collection
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - hidden
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      tailRewriteCollection:
                        type: string
                        title: Tail Rewrite Collection
                        description: Collection where tail rewrites are stored.
                        minLength: 1
                      analyzerConfigQuery:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [ { "name":
                          "StdTokLowerStem","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "englishminimalstem" }] }],"fields": [{ "regex": ".+",
                          "analyzer": "StdTokLowerStem" } ]}
                        hints:
                          - lengthy
                          - advanced
                          - code/json
                        minLength: 1
                      countField:
                        type: string
                        title: Event Count Field Name
                        description: >-
                          Field containing the number of times an event (like a
                          click) occurs for a particular query; count_i in the
                          raw signal collection or aggr_count_i in the
                          aggregated signal collection.
                        default: count_i
                        minLength: 1
                      mainType:
                        type: string
                        title: Main Event Type
                        description: >-
                          The main signal event type (e.g. click) that head tail
                          analysis is based on. E.g., if main type is click,
                          then head and tail queries are defined by the number
                          of clicks.
                        default: click
                        minLength: 1
                      filterType:
                        type: string
                        title: Filtering Event Type
                        description: >-
                          The secondary event type (e.g. response) that can be
                          used for filtering out rare searches. Note: In order
                          to use the `response` default value, please make sure
                          you have type:response in the input collection. If
                          there is no need to filter on number of searches,
                          please leave this parameter blank.
                        default: response
                      signalTypeField:
                        type: string
                        title: Field Name of Signal Type
                        description: The field name of signal type in the input collection.
                        default: type
                      minCountMain:
                        type: integer
                        title: Minimum Main Event Count
                        description: >-
                          Minimum number of main events (e.g. clicks after
                          aggregation) necessary for the query to be considered.
                          The job will only analyze queries with clicks greater
                          or equal to this number.
                        default: 1
                      minCountFilter:
                        type: integer
                        title: Minimum Filtering Event Count
                        description: >-
                          Minimum number of filtering events (e.g. searches
                          after aggregation) necessary for the query to be
                          considered. The job will only analyze queries that
                          were issued greater or equal to this number of times.
                        default: 20
                      queryLenThreshold:
                        type: integer
                        title: 'Minimum Query Length '
                        description: >-
                          Minimum length of a query to be included for analysis.
                          The job will only analyze queries with length greater
                          than or equal to this value.
                        default: 2
                      userHead:
                        type: number
                        title: Head Count Threshold
                        description: >-
                          User defined threshold for head definition. value=-1.0
                          will allow the program to pick the number
                          automatically. value<1.0 denotes a percentage (e.g 0.1
                          means put the top 10% of queries into the head),
                          value=1.0 denotes 100% (e.g 1 means put all queries
                          into the head), value>1.0 denotes the exact number of
                          queries to put in the head (e.g 100 means the top 100
                          queries constitute the head)
                        default: -1
                        hints:
                          - advanced
                      userTail:
                        type: number
                        title: Tail Count Threshold
                        description: >-
                          User defined threshold for tail definition. value=-1.0
                          will allow the program to pick the number
                          automatically. value<1.0 denotes a percentage, (e.g
                          0.1 means put the bottom 10% of queries into the tail)
                          value=1.0 denotes 100% (e.g 1 means put all queries
                          into the tail), value>1.0 denotes the exact number of
                          queries to put into the tail (e.g 100 means the bottom
                          100 queries constitute the tail).
                        default: -1
                        hints:
                          - advanced
                      topQ:
                        type: array
                        title: Top X% Head Query Event Count
                        description: >-
                          Compute how many total events come from the top X head
                          queries (Either a number greater than or equal to 1.0
                          or a percentage of the total number of unique queries)
                        default:
                          - 100
                          - 0.01
                        hints:
                          - advanced
                        items:
                          type: number
                      trafficPerc:
                        type: array
                        title: Number of Queries that Constitute X% of Total Events
                        description: >-
                          Compute how many queries constitute each of the
                          specified event portions(E.g., 0.25, 0.50)
                        default:
                          - 0.25
                          - 0.5
                          - 0.75
                        hints:
                          - advanced
                        items:
                          type: number
                      lastTraffic:
                        type: array
                        title: Bottom X% Tail Query Event Count
                        description: >-
                          Compute the total number of queries that are spread
                          over each of the specified tail event portions (E.g.,
                          0.01)
                        default:
                          - 0.01
                        hints:
                          - advanced
                        items:
                          type: number
                      trafficCount:
                        type: array
                        title: Event Count Computation Threshold
                        description: >-
                          Compute how many queries have events less than each
                          value specified (E.g., a value of 5.0 would return the
                          number of queries that have less than 5 associated
                          events)
                        default:
                          - 5
                        hints:
                          - advanced
                        items:
                          type: number
                      keywordsBlobName:
                        type: string
                        title: Keywords blob name
                        description: >-
                          Name of the keywords blob resource. Typically, this
                          should be a csv file uploaded to blob store in a
                          specific format. Check documentation for more details
                          on format and uploading to blob store 
                        minLength: 1
                        reference: blob
                        blobType: file:spark
                      lenScale:
                        type: integer
                        title: Edit Distance vs String Length Scale
                        description: >-
                          A scaling factor used to normalize the length of the
                          query string. This filters head and tail string match
                          based on if edit_dist <= string_length/length_scale. A
                          large value for this factor leads to a shorter
                          spelling list. A smaller value leads to a longer
                          spelling list but may add lower quality corrections.
                        default: 6
                        hints:
                          - advanced
                      overlapThreshold:
                        type: integer
                        title: Head and tail Overlap threshold
                        description: >-
                          The threshold for the number of overlapping tokens
                          between the head and tail. When a head string and tail
                          string share more tokens than this threshold, they are
                          considered a good match.
                        default: 4
                        hints:
                          - advanced
                      overlapNumBoost:
                        type: number
                        title: Token Overlap Number Boost
                        description: >-
                          When there are multiple possible head matches for a
                          tail, we rank heads based on: overlapNumBoost *
                          overlapNum + headQueryCountBoost *
                          log(headQueryCount). A big number puts more weight on
                          how many tokens match between the head and tail query
                          strings instead of the number of times a head query
                          appears.
                        default: 10
                        hints:
                          - hidden
                          - advanced
                      headQueryCntBoost:
                        type: number
                        title: Head query count boost
                        description: >-
                          When there are multiple possible head matches for
                          tail, we rank heads based on: overlapNumBoost *
                          overlapNum + headQueryCountBoost *
                          log(headQueryCount). A big number puts more weight on
                          the count head query instead of the number of tokens
                          shared between the head and tail query strings
                        default: 1
                        hints:
                          - hidden
                          - advanced
                      tailRewrite:
                        type: boolean
                        title: Generate tail rewrite table
                        description: >-
                          If true, also generate tail rewrite table, o.w., only
                          get distributions. May need to set it to false in the
                          very first run to help customize head and tail
                          positions.
                        default: true
                        hints:
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      enableAutoPublish:
                        type: boolean
                        title: Enable auto-publishing
                        description: >-
                          If true, automatically publishes rewrites for rules.
                          Default is false to allow for initial human-aided
                          reviewing
                        default: false
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - headTailAnalysis
                        default: headTailAnalysis
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - signalTypeField
                          - mainType
                          - filterType
                          - countField
                      - label: Model Tuning Parameters
                        properties:
                          - minCountMain
                          - minCountFilter
                          - tailRewrite
                          - userHead
                          - userTail
                          - lenScale
                          - overlapThreshold
                          - topQ
                          - trafficCount
                          - trafficPerc
                          - lastTraffic
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfigQuery
                          - queryLenThreshold
                      - label: Misc. Parameters
                        properties:
                          - keywordsBlobName
                  - type: object
                    title: SQL Aggregation
                    description: >-
                      Use this job when you want to aggregate your data in some
                      way.
                    required:
                      - id
                      - inputCollection
                      - sql
                      - dataFormat
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      inputCollection:
                        type: string
                        title: Source Collection
                        description: Collection containing signals to be aggregated.
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: >-
                          The collection to write the aggregates to on output.
                          This property is required if the selected output /
                          rollup pipeline requires it (the default pipeline
                          does). A special value of '-' disables the output.
                        hints:
                          - advanced
                        minLength: 1
                      rows:
                        type: integer
                        title: Batch Size
                        description: >-
                          Number of rows to read from the source collection per
                          request.
                        default: 10000
                        hints:
                          - advanced
                      sql:
                        type: string
                        title: SQL
                        description: >-
                          Use SQL to perform the aggregation. You do not need to
                          include a time range filter in the WHERE clause as it
                          gets applied automatically before executing the SQL
                          statement.
                        hints:
                          - lengthy
                          - code/sql
                        minLength: 1
                      rollupSql:
                        type: string
                        title: Rollup SQL
                        description: >-
                          Use SQL to perform a rollup of previously aggregated
                          docs. If left blank, the aggregation framework will
                          supply a default SQL query to rollup aggregated
                          metrics.
                        hints:
                          - lengthy
                          - code/sql
                          - advanced
                        minLength: 1
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Additional configuration settings to fine-tune how
                          input records are read for this aggregation.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      sourceCatchup:
                        type: boolean
                        title: Aggregate New and Merge with Existing
                        description: >-
                          If checked, only aggregate new signals created since
                          the last time the job was successfully run. If there
                          is a record of such previous run then this overrides
                          the starting time of time range set in 'timeRange'
                          property. If unchecked, then all matching signals are
                          aggregated and any previously aggregated docs are
                          deleted to avoid double counting.
                        default: true
                        hints:
                          - advanced
                      sourceRemove:
                        type: boolean
                        title: Remove signals from source
                        description: >-
                          If checked, remove signals from source collection once
                          aggregation job has finished running.
                        default: false
                        hints:
                          - advanced
                      aggregationTime:
                        type: string
                        title: Aggregation Time
                        description: >-
                          Timestamp to use for the aggregation results. Defaults
                          to NOW.
                        hints:
                          - advanced
                        format: date-time
                      referenceTime:
                        type: string
                        title: Reference Time
                        description: >-
                          Timestamp to use for computing decays and to determine
                          the value of NOW.
                        hints:
                          - advanced
                        format: date-time
                      skipCheckEnabled:
                        type: boolean
                        title: Job Skip Check Enabled?
                        description: >-
                          If the catch-up flag is enabled and this field is
                          checked, the job framework will execute a fast Solr
                          query to determine if this run can be skipped.
                        default: true
                        hints:
                          - advanced
                      skipJobIfSignalsEmpty:
                        type: boolean
                        title: Skip Job run
                        description: Skip Job run if signals collection is empty
                        hints:
                          - advanced
                      parameters:
                        type: array
                        title: Parameters
                        description: >-
                          Other aggregation parameters (e.g. timestamp field
                          etc..).
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      signalTypes:
                        type: array
                        title: Signal Types
                        description: >-
                          The signal types. If not set then any signal type is
                          selected
                        items:
                          type: string
                      selectQuery:
                        type: string
                        title: Query
                        description: The query to select the desired input documents.
                        default: '*:*'
                        hints:
                          - advanced
                        minLength: 1
                      timeRange:
                        type: string
                        title: Time Range
                        description: The time range to select signals on.
                        hints:
                          - advanced
                        minLength: 1
                      useNaturalKey:
                        type: boolean
                        title: Use Natural Key?
                        description: >-
                          Use a natural key provided in the raw signals data for
                          aggregation, rather than relying on Solr UUIDs.
                          Migrated aggregations jobs from Fusion 4 will need
                          this set to false.
                        default: true
                        hints:
                          - advanced
                      optimizeSegments:
                        type: integer
                        title: Optimize Segments
                        description: >-
                          If set to a value above 0, the aggregator job will
                          optimize the resulting Solr collection into this many
                          segments
                        default: 0
                        hints:
                          - advanced
                        minimum: 0
                        exclusiveMinimum: false
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - aggregation
                        default: aggregation
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Random Forest Classifier Training (deprecated)
                    description: >-
                      Use this job when you have training data and you want to
                      train a random forest model to classify text into groups.
                      Deprecated as of Fusion 5.2.0 and will be removed in a
                      future release; use the Classification job instead.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - trainingLabelField
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing labeled training data
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to Vectorize
                        description: >-
                          Solr field containing text training data. Data from
                          multiple fields with different weights can be combined
                          by specifying them as field1:weight1,field2:weight2
                          etc.
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelId:
                        type: string
                        title: Model ID
                        description: >-
                          Identifier for the model to be trained; uses the
                          supplied Spark Job ID if not provided.
                        hints:
                          - advanced
                        minLength: 1
                      analyzerConfig:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" },{ "type": "length", "min": "2", "max":
                          "32767" },{ "type": "fusionstop", "ignoreCase":
                          "true", "format": "snowball", "words":
                          "org/apache/lucene/analysis/snowball/english_stop.txt"
                          }] }],"fields": [{ "regex": ".+", "analyzer":
                          "StdTokLowerStop" } ]}
                        hints:
                          - advanced
                          - code/json
                          - lengthy
                      withIdf:
                        type: boolean
                        title: IDF Weighting
                        description: >-
                          Weight vector components based on inverse document
                          frequency
                        default: true
                        hints:
                          - advanced
                      w2vDimension:
                        type: integer
                        title: Word2Vec Dimension
                        description: >-
                          Word-vector dimensionality to represent text (choose >
                          0 to use)
                        default: 0
                        hints:
                          - advanced
                        minimum: 0
                        exclusiveMinimum: false
                      w2vWindowSize:
                        type: integer
                        title: Word2Vec Window Size
                        description: >-
                          The window size (context words from [-window, window])
                          for word2vec
                        default: 5
                        hints:
                          - advanced
                        minimum: 3
                        exclusiveMinimum: false
                      w2vMaxSentenceLength:
                        type: integer
                        title: Max Word2Vec Sentence Length
                        description: >-
                          Sets the maximum length (in words) of each sentence in
                          the input data. Any sentence longer than this
                          threshold will be divided into chunks of up to
                          `maxSentenceLength` size.
                        default: 1000
                        hints:
                          - advanced
                        minimum: 3
                        exclusiveMinimum: false
                      w2vMaxIter:
                        type: integer
                        title: Max Word2Vec Iterations
                        description: Maximum number of iterations of the word2vec training
                        default: 1
                        hints:
                          - advanced
                      w2vStepSize:
                        type: number
                        title: Word2Vec Step Size
                        description: >-
                          Training parameter for word2vec convergence (change at
                          your own peril)
                        default: 0.025
                        hints:
                          - advanced
                        minimum: 0.005
                        exclusiveMinimum: false
                      minDF:
                        type: number
                        title: Minimum Term Document Frequency
                        description: >-
                          To be kept, terms must occur in at least this number
                          of documents (if > 1.0), or at least this fraction of
                          documents (if <= 1.0)
                        default: 0
                        hints:
                          - advanced
                      maxDF:
                        type: number
                        title: Max Term Document Frequency
                        description: >-
                          To be kept, terms must occur in no more than this
                          number of documents (if > 1.0), or no more than this
                          fraction of documents (if <= 1.0)
                        default: 1
                        hints:
                          - advanced
                      norm:
                        type: integer
                        title: Vector normalization
                        description: >-
                          p-norm to normalize vectors with (choose -1 to turn
                          normalization off)
                        enum:
                          - -1
                          - 0
                          - 1
                          - 2
                        default: 2
                        hints:
                          - advanced
                      predictedLabelField:
                        type: string
                        title: Predicted Label Field
                        description: >-
                          Solr field which will contain labels when classifier
                          is applied to documents
                        default: labelPredictedByFusionModel
                        hints:
                          - advanced
                      serializeAsMleap:
                        type: boolean
                        title: Serialize as Mleap Bundle
                        description: Serialize the output model as Mleap Bundle
                        default: true
                        hints:
                          - hidden
                      minSparkPartitions:
                        type: integer
                        title: Minimum Number of Spark Partitions
                        description: Minimum number of Spark partitions for training job.
                        default: 200
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      overwriteExistingModel:
                        type: boolean
                        title: Overwrite existing model
                        description: >-
                          If a model exists in the model store, overwrite when
                          this job runs
                        default: true
                        hints:
                          - advanced
                      trainingLabelField:
                        type: string
                        title: Label Field
                        description: >-
                          Solr field containing labels for training instances
                          (should be single-valued strings)
                      gridSearch:
                        type: boolean
                        title: Grid Search with Cross Validation
                        description: Perform grid search to optimize hyperparameters
                        default: false
                      evaluationMetricType:
                        type: string
                        title: Evaluation Metric Type
                        description: >-
                          Optimize hyperparameter search over one of [binary,
                          multiclass, regression] metrics, or 'none'
                        enum:
                          - binary
                          - multiclass
                          - regression
                          - none
                        default: none
                        hints:
                          - advanced
                      autoBalanceClasses:
                        type: boolean
                        title: Auto-balance training classes
                        description: >-
                          Ensure that all classes of training data have the same
                          size
                        default: true
                        hints:
                          - advanced
                      minTrainingSamplesPerClass:
                        type: integer
                        title: Minimum Labeled Class Size
                        description: >-
                          Ensure that all classes of training data have at least
                          this many examples
                        default: 100
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      makeOtherClass:
                        type: boolean
                        title: Make 'Other' Class
                        description: >-
                          Create a label class 'Other' which contains all
                          examples not in a class large enough to train on
                        default: true
                        hints:
                          - advanced
                      otherClassName:
                        type: string
                        title: '''Other'' class name'
                        description: Label class name for the catch-all 'Other' class
                        default: Other
                        hints:
                          - advanced
                        minLength: 1
                      maxDepth:
                        type: integer
                        title: Maximum tree depth
                        description: >-
                          Maximum depth of the tree (>= 0).  E.g., depth 0 means
                          1 leaf node; depth 1 means 1 internal node + 2 leaf
                          nodes.
                        default: 5
                        maximum: 20
                        exclusiveMaximum: false
                        minimum: 1
                        exclusiveMinimum: false
                      maxBins:
                        type: integer
                        title: Maximum number of discretizing bins
                        description: >-
                          Max number of bins for discretizing continuous
                          features. Must be >=2 and >= number of categories for
                          any categorical feature.
                        default: 32
                        maximum: 128
                        exclusiveMaximum: false
                        minimum: 0
                        exclusiveMinimum: false
                      numTrees:
                        type: integer
                        title: Number of trees
                        description: Number of trees to train (>= 1)
                        default: 20
                        maximum: 1000
                        exclusiveMaximum: false
                        minimum: 1
                        exclusiveMinimum: false
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - random_forests_classifier
                        default: random_forests_classifier
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - predictedLabelField
                          - trainingLabelField
                      - label: Model Tuning Parameters
                        properties:
                          - w2vDimension
                          - w2vWindowSize
                          - w2vMaxIter
                          - w2vMaxSentenceLength
                          - w2vStepSize
                          - withIdf
                          - maxDF
                          - minDF
                          - norm
                          - autoBalanceClasses
                          - evaluationMetricType
                          - minTrainingSamplesPerClass
                          - otherClassName
                          - makeOtherClass
                          - gridSearch
                          - maxBins
                          - numTrees
                          - maxDepth
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: Create Collections in Milvus (deprecated)
                    description: Creates collections with specified parameters in Milvus
                    required:
                      - id
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      collections-list:
                        type: array
                        title: Collections
                        description: >-
                          List of the collections that should be created with
                          corresponding params.
                        items:
                          type: object
                          required:
                            - milvusCollectionName
                            - dimension
                            - indexFileSize
                            - metric
                          properties:
                            milvusCollectionName:
                              type: string
                              title: Collection Name
                              description: Name of the collection to create in Milvus
                              pattern: ^[a-zA-Z0-9_]+$
                            dimension:
                              type: integer
                              title: Dimension
                              description: >-
                                Dimension size of vectors to be stored in the
                                collection
                            indexFileSize:
                              type: integer
                              title: Index File Size
                              description: >-
                                Files larger than this will trigger index
                                building for raw data files
                              default: 1024
                              minimum: 1
                              exclusiveMinimum: false
                            metric:
                              type: string
                              title: Metric
                              description: >-
                                Metric which should be used for vectors
                                similarity
                              enum:
                                - Euclidean
                                - Inner Product
                                - Hamming
                                - Jaccard
                                - Tanimoto
                                - Substructure
                                - Superstructure
                              default: Inner Product
                      allow-recreate:
                        type: boolean
                        title: Override collections
                        description: >-
                          If checked and there are existing collections with the
                          same names, they will be dropped and recreated. If
                          unchecked, the exception is thrown in such situation.
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-milvus-create-collections
                        default: argo-milvus-create-collections
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Word2Vec Model Training (deprecated)
                    description: >-
                      Trains a shallow neural model, and projects each document
                      onto this vector embedding space. Deprecated as of Fusion
                      5.2.0 and will be removed in a future release.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - outputCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing labeled training data
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to Vectorize
                        description: >-
                          Solr field containing text training data. Data from
                          multiple fields with different weights can be combined
                          by specifying them as field1:weight1,field2:weight2
                          etc.
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelId:
                        type: string
                        title: Model ID
                        description: >-
                          Identifier for the model to be trained; uses the
                          supplied Spark Job ID if not provided.
                        hints:
                          - advanced
                        minLength: 1
                      analyzerConfig:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" },{ "type": "length", "min": "2", "max":
                          "32767" },{ "type": "fusionstop", "ignoreCase":
                          "true", "format": "snowball", "words":
                          "org/apache/lucene/analysis/snowball/english_stop.txt"
                          }] }],"fields": [{ "regex": ".+", "analyzer":
                          "StdTokLowerStop" } ]}
                        hints:
                          - advanced
                          - code/json
                          - lengthy
                      withIdf:
                        type: boolean
                        title: IDF Weighting
                        description: >-
                          Weight vector components based on inverse document
                          frequency
                        default: true
                        hints:
                          - advanced
                      w2vDimension:
                        type: integer
                        title: Embedding Dimension
                        description: Word-vector dimensionality to represent text
                        default: 50
                        hints:
                          - dummy
                        minimum: 0
                        exclusiveMinimum: false
                      w2vWindowSize:
                        type: integer
                        title: Window Size
                        description: >-
                          The window size (context words from [-window, window])
                          for word2vec
                        default: 5
                        hints:
                          - dummy
                        minimum: 3
                        exclusiveMinimum: false
                      w2vMaxSentenceLength:
                        type: integer
                        title: Max Sentence Length
                        description: >-
                          Sets the maximum length (in words) of each sentence in
                          the input data. Any sentence longer than this
                          threshold will be divided into chunks of up to
                          `maxSentenceLength` size.
                        default: 1000
                        hints:
                          - dummy
                        minimum: 3
                        exclusiveMinimum: false
                      w2vMaxIter:
                        type: integer
                        title: Max Iterations
                        description: Maximum number of iterations of the word2vec training
                        default: 1
                        hints:
                          - advanced
                      w2vStepSize:
                        type: number
                        title: Step Size
                        description: >-
                          Training parameter for word2vec convergence (change at
                          your own peril)
                        default: 0.025
                        hints:
                          - advanced
                        minimum: 0.005
                        exclusiveMinimum: false
                      minDF:
                        type: number
                        title: Minimum Term Document Frequency
                        description: >-
                          To be kept, terms must occur in at least this number
                          of documents (if > 1.0), or at least this fraction of
                          documents (if <= 1.0)
                        default: 0
                        hints:
                          - advanced
                      maxDF:
                        type: number
                        title: Max Term Document Frequency
                        description: >-
                          To be kept, terms must occur in no more than this
                          number of documents (if > 1.0), or no more than this
                          fraction of documents (if <= 1.0)
                        default: 1
                        hints:
                          - advanced
                      norm:
                        type: integer
                        title: Vector normalization
                        description: >-
                          p-norm to normalize vectors with (choose -1 to turn
                          normalization off)
                        enum:
                          - -1
                          - 0
                          - 1
                          - 2
                        default: 2
                        hints:
                          - advanced
                      predictedLabelField:
                        type: string
                        title: Word2Vec Feature Field
                        description: >-
                          Solr field which will contain vector features when the
                          word2vec model is applied to documents
                        default: w2vFeatures
                        hints:
                          - hidden
                      serializeAsMleap:
                        type: boolean
                        title: Serialize as Mleap Bundle
                        description: Serialize the output model as Mleap Bundle
                        default: true
                        hints:
                          - hidden
                      minSparkPartitions:
                        type: integer
                        title: Minimum Number of Spark Partitions
                        description: Minimum number of Spark partitions for training job.
                        default: 200
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      overwriteExistingModel:
                        type: boolean
                        title: Overwrite existing model
                        description: >-
                          If a model exists in the model store, overwrite when
                          this job runs
                        default: true
                        hints:
                          - advanced
                      outputField:
                        type: string
                        title: Output Field
                        description: >-
                          Solr field which will contain terms which the word2vec
                          model considers are related to the input
                        default: related_terms_txt
                      uidField:
                        type: string
                        title: ID Field Name
                        description: Field containing the unique ID for each document
                        minLength: 1
                      numRelatedTerms:
                        type: integer
                        title: Number of Related Words
                        description: >-
                          For each collection of input words, find this many
                          word2vec-related words
                        default: 10
                        minimum: 1
                        exclusiveMinimum: false
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - word2vec
                        default: word2vec
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - predictedLabelField
                          - uidField
                          - outputField
                      - label: Model Tuning Parameters
                        properties:
                          - w2vDimension
                          - w2vWindowSize
                          - w2vMaxIter
                          - w2vMaxSentenceLength
                          - w2vStepSize
                          - withIdf
                          - maxDF
                          - minDF
                          - norm
                          - numRelatedTerms
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: Parameterized SQL Aggregation
                    description: >-
                      A SQL aggregation job where users provide parameters to be
                      injected into a built-in SQL template at runtime.
                    required:
                      - id
                      - inputCollection
                      - dataFormat
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      inputCollection:
                        type: string
                        title: Source Collection
                        description: Collection containing documents to be aggregated.
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: >-
                          The collection to write the aggregates to on output.
                          Defaults to the input collection if not specified.
                      notes:
                        type: string
                        title: Notes
                        description: A short description about this job.
                        hints:
                          - lengthy
                      parameters:
                        type: array
                        title: SQL Parameters
                        description: Parameters bound on the SQL template at runtime.
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      selectQuery:
                        type: string
                        title: Query
                        description: >-
                          The query to select the desired signals. If not set
                          then '*:*' will be used, or equivalent.
                        default: '*:*'
                        hints:
                          - advanced
                      timeRange:
                        type: string
                        title: Time Range
                        description: The time range to select signals on.
                        hints:
                          - advanced
                        minLength: 1
                      sourceCatchup:
                        type: boolean
                        title: Aggregate New and Merge with Existing
                        description: >-
                          If checked, only aggregate new signals created since
                          the last time the job was successfully run. If there
                          is a record of such previous run then this overrides
                          the starting time of time range set in 'timeRange'
                          property. If unchecked, then all matching signals are
                          aggregated and any previously aggregated docs are
                          deleted to avoid double counting.
                        default: true
                        hints:
                          - advanced
                      sql:
                        type: string
                        title: SQL
                        description: >-
                          Use SQL to perform the aggregation. You do not need to
                          include a time range filter in the WHERE clause as it
                          gets applied automatically before executing the SQL
                          statement.
                        hints:
                          - advanced
                          - code/sql
                        minLength: 1
                      rollupSql:
                        type: string
                        title: Rollup SQL
                        description: >-
                          Use SQL to perform a rollup of previously aggregated
                          docs. If left blank, the aggregation framework will
                          supply a default SQL query to rollup aggregated
                          metrics.
                        hints:
                          - advanced
                          - code/sql
                        minLength: 1
                      sourceRemove:
                        type: boolean
                        title: Remove Source
                        description: >-
                          If true, the processed source signals will be removed
                          after aggregation. Default is false.
                        default: false
                        hints:
                          - advanced
                      referenceTime:
                        type: string
                        title: Reference Time
                        description: >-
                          Timestamp to use for computing decays and to determine
                          the value of NOW.
                        hints:
                          - advanced
                        format: date-time
                      hiddenParameters:
                        type: array
                        title: Hidden Parameters
                        description: >-
                          Additional settings used to tune the underlying
                          aggregation job.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Additional configuration settings to fine-tune how
                          input records are read for this aggregation.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      skipCheckEnabled:
                        type: boolean
                        title: Job Skip Check Enabled?
                        description: >-
                          If the catch-up flag is enabled and this field is
                          checked, the job framework will execute a fast Solr
                          query to determine if this run can be skipped.
                        default: true
                        hints:
                          - advanced
                      useNaturalKey:
                        type: boolean
                        title: Use Natural Key?
                        description: >-
                          Use a natural key provided in the raw signals data for
                          aggregation, rather than relying on Solr UUIDs.
                          Migrated aggregations jobs from Fusion 4 will need
                          this set to false.
                        default: true
                        hints:
                          - advanced
                      optimizeSegments:
                        type: integer
                        title: Optimize Segments
                        description: >-
                          If set to a value above 0, the aggregator job will
                          optimize the resulting Solr collection into this many
                          segments
                        default: 0
                        hints:
                          - advanced
                        minimum: 0
                        exclusiveMinimum: false
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - sql_template
                        default: sql_template
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Token and Phrase Spell Correction (Deprecated)
                    description: >-
                      Use this job to compute token and phrase level spell
                      correction which you can use in your synonym list. This
                      job is deprecated.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Input Collection
                        description: >-
                          Collection containing search strings and event counts.
                          Should ideally be the signals collection.If an
                          aggregation collection is being used, update the
                          filter query in the advanced options
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Input Field
                        description: Field containing search strings.
                        default: query
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr (e.g. type:click OR type:response), Spark SQL
                          expression for all other data sources
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: >-
                          Collection to store misspelling and correction pairs.
                          Defaults to the query_rewrite_staging collection for
                          the application.
                        hints:
                          - dummy
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - hidden
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      stopwordsBlobName:
                        type: string
                        title: Stopwords blob (Deprecated)
                        description: >-
                          Name of stopwords blob resource (.txt or .rtf file
                          uploaded to the blob store). This field is marked for
                          deprecation. Going forward, please specify the
                          stopwords blob name as a luceneSchema property.
                        hints:
                          - advanced
                        minLength: 1
                        reference: blob
                        blobType: file:spark
                      dictionaryCollection:
                        type: string
                        title: Dictionary Collection
                        description: >-
                          Solr Collection containing dictionary with correct
                          spellings. E.g., product catalog.
                      dictionaryField:
                        type: string
                        title: Dictionary Field
                        description: >-
                          Solr field containing dictionary text. Multiple fields
                          can be specified using the format: field1,field2 etc.
                      countField:
                        type: string
                        title: Count Field
                        description: Solr field containing query count
                        default: count_i
                      mainType:
                        type: string
                        title: Main Event Type
                        description: >-
                          The main signal event type (e.g. click) that the job
                          is based on if input is signal data. E.g., if main
                          type is click, then head and tail tokens/phrases are
                          defined by the number of clicks.
                        default: click
                      filterType:
                        type: string
                        title: Filtering Event Type
                        description: >-
                          The secondary event type (e.g. response) that can be
                          used for filtering out rare searches.Note: In order to
                          use this `response` default value, please make sure
                          you have type:response in the input collection.If
                          there is no need to filter on number of searches,
                          please leave this parameter blank.
                        default: response
                      signalTypeField:
                        type: string
                        title: Field Name of Signal Type
                        description: The field name of signal type in the input collection.
                        default: type
                        hints:
                          - advanced
                      minCountMain:
                        type: integer
                        title: Minimum Main Event Count
                        description: >-
                          Minimum number of main events (e.g. clicks after
                          aggregation) necessary for the query to be considered.
                          The job will only analyze queries with clicks greater
                          or equal to this number.
                        default: 1
                      minCountFilter:
                        type: integer
                        title: Minimum Filtering Event Count
                        description: >-
                          Minimum number of filtering events (e.g. searches
                          after aggregation) necessary for the query to be
                          considered. The job will only analyze queries that
                          were issued greater or equal to this number of times.
                        default: 10
                      dictionaryDataFilterQuery:
                        type: string
                        title: Dictionary Data Filter Query
                        description: Solr query to use when loading dictionary data
                        default: '*:*'
                        hints:
                          - advanced
                      minPrefix:
                        type: integer
                        title: Minimum Prefix Match
                        description: >-
                          The minimum number of matches on starting characters.
                          Note: Setting it to 0 may largely increase running
                          time. 
                        default: 1
                        minimum: 0
                        exclusiveMinimum: false
                      minMispellingLen:
                        type: integer
                        title: Minimum Length of Misspelling
                        description: >-
                          The minimum length of misspelling to check. Smaller
                          number may lead to problematic corrections. E.g., It
                          is hard to find the right correction for a two or
                          three character string. 
                        default: 5
                        minimum: 1
                        exclusiveMinimum: false
                      maxDistance:
                        type: integer
                        title: Maximum Edit Distance
                        description: >-
                          The maximum edit distance between related
                          token/phrases you are interested in. Large number
                          leads to longer correction list but may add lower
                          quality corrections. 
                        default: 2
                        minimum: 1
                        exclusiveMinimum: false
                      lastCharMatchBoost:
                        type: number
                        title: Last Character Match Boost
                        description: >-
                          When there are multiple possible corrections, we rank
                          corrections based on: editDistBoost / editDist +
                          correctionCountBoost * log(correctionCount) +
                          lastCharMatchBoost * lastCharMatch + soundMatchBoost *
                          soundexMatch. Big number puts more weight on last
                          character match between misspelling and correction
                          strings
                        default: 1
                        hints:
                          - advanced
                      soundMatchBoost:
                        type: number
                        title: Sound Match Boost
                        description: >-
                          When there are multiple possible corrections, we rank
                          corrections based on: editDistBoost / editDist +
                          correctionCountBoost * log(correctionCount) +
                          lastCharMatchBoost * lastCharMatch + soundMatchBoost *
                          soundexMatch. Big number puts more weight on soundex
                          match between misspelling and correction strings
                        default: 3
                        hints:
                          - advanced
                      correctCntBoost:
                        type: number
                        title: Correction Count Boost
                        description: >-
                          When there are multiple possible corrections, we rank
                          corrections based on: editDistBoost / editDist +
                          correctionCountBoost * log(correctionCount) +
                          lastCharMatchBoost * lastCharMatch + soundMatchBoost *
                          soundexMatch. Big number puts more weight on count of
                          correction string occurrences.
                        default: 2
                        hints:
                          - advanced
                      editDistBoost:
                        type: number
                        title: Edit Distance Boost
                        description: >-
                          When there are multiple possible corrections, we rank
                          corrections based on: editDistBoost / editDist +
                          correctionCountBoost * log(correctionCount) +
                          lastCharMatchBoost * lastCharMatch + soundMatchBoost *
                          soundexMatch. Big number puts more weight on shorter
                          edit distance.
                        default: 2
                        hints:
                          - advanced
                      signalDataIndicator:
                        type: boolean
                        title: Input is Signal Data
                        description: >-
                          The input dataset that the spell checker based on is
                          signal data. If the input data is content document
                          rather than signal, please uncheck.
                        default: true
                      analyzerConfigQuery:
                        type: string
                        title: Lucene Analyzer Schema for Processing Queries
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [ { "name":
                          "LetterTokLowerStem","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "letter"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" }] }],"fields": [{ "regex": ".+", "analyzer":
                          "LetterTokLowerStem" } ]}
                        hints:
                          - lengthy
                          - code/json
                        minLength: 1
                      analyzerConfigDictionary:
                        type: string
                        title: Lucene Analyzer Schema for Processing Dictionary
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [ { "name":
                          "LetterTokLowerStem","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "letter"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" }] }],"fields": [{ "regex": ".+", "analyzer":
                          "LetterTokLowerStem" } ]}
                        hints:
                          - lengthy
                          - code/json
                        minLength: 1
                      correctionThreshold:
                        type: number
                        title: Correct Spelling Threshold
                        description: >-
                          The count of occurrence ABOVE which the token/phrases
                          are likely to be corrected spellings. Note that this
                          number can be either fraction (<1.0) to denote a
                          quantile based on count number distribution (shown in
                          the log) or a number (>1.0) to denote the absolute
                          count. A big number may cause performance issues.
                        default: 0.8
                        hints:
                          - advanced
                      misspellingThreshold:
                        type: number
                        title: Misspelling Threshold
                        description: >-
                          The count of occurrence BELOW which the token/phrases
                          are likely to be misspellings. Note that this number
                          can be either fraction (<1.0) to denote a quantile
                          based on count number distribution (shown in the log)
                          or a number (>1.0) to denote the absolute count.
                        default: 0.8
                        hints:
                          - advanced
                      lenScale:
                        type: integer
                        title: Edit Dist vs String Length Scale
                        description: >-
                          A scaling factor used to normalize the length of query
                          string to compare against edit distances. The
                          filtering is based on if edit_dist <=
                          string_length/length_scale. A large value for this
                          factor leads to a shorter correction list. A small
                          value leads to a longer correction list but may add
                          lower quality corrections.
                        default: 5
                        hints:
                          - advanced
                      corMisRatio:
                        type: number
                        title: Correction and Misspelling Count Ratio
                        description: >-
                          Ratio between correction occurrence count and
                          misspelling occurrence count. Pairs with ratio less
                          than or equal to this number will be filtered. Big
                          number leads to shorter correction list and may have
                          higher quality corrections.
                        default: 3
                        hints:
                          - advanced
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      enableAutoPublish:
                        type: boolean
                        title: Enable auto-publishing
                        description: >-
                          If true, automatically publishes rewrites for rules.
                          Default is false to allow for initial human-aided
                          reviewing
                        default: false
                        hints:
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - tokenPhraseSpellCorrection
                        default: tokenPhraseSpellCorrection
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                          - signalDataIndicator
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - signalTypeField
                          - mainType
                          - filterType
                          - countField
                      - label: Boost Parameters
                        properties:
                          - lastCharMatchBoost
                          - soundMatchBoost
                          - correctCntBoost
                          - editDistBoost
                      - label: Model Tuning Parameters
                        properties:
                          - minCountMain
                          - minCountFilter
                          - correctionThreshold
                          - misspellingThreshold
                          - lenScale
                          - corMisRatio
                          - maxDistance
                          - minMispellingLen
                          - minPrefix
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfigQuery
                      - label: Misc. Parameters
                        properties:
                          - stopwordsBlobName
                          - dictionaryCollection
                          - dictionaryField
                          - dictionaryDataFilterQuery
                          - analyzerConfigDictionary
                  - type: object
                    title: SQL-Based Experiment Metric
                    description: >-
                      This job is created by an experiment in order to calculate
                      an objective
                    required:
                      - id
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      experimentId:
                        type: string
                        title: Experiment ID
                        hints:
                          - readonly
                      metricName:
                        type: string
                        title: Objective name
                        hints:
                          - readonly
                      notes:
                        type: string
                        title: Notes
                        description: A short description about this job.
                        hints:
                          - lengthy
                      sql:
                        type: string
                        title: SQL
                        hints:
                          - readonly
                      experiment:
                        type: object
                        title: Experiment
                        required:
                          - id
                          - baseSignalsCollection
                          - metrics
                        properties:
                          id:
                            type: string
                            title: ID
                            maxLength: 128
                            pattern: ^[A-Za-z0-9_\-]+$
                          description:
                            type: string
                            title: Description
                          uniqueIdParameter:
                            type: string
                            title: Unique ID Parameter
                            description: >-
                              The name of the request parameter containing the
                              user ID
                            default: userId
                          baseSignalsCollection:
                            type: string
                            title: Base Collection for Signals
                            description: >-
                              Signals resulting from requests that flow through
                              this experiment will go into the signal collection
                              associated with this collection
                            minLength: 1
                            pattern: ^[A-Za-z0-9_\-]+$
                          variants:
                            type: array
                            title: Variants
                            description: >-
                              Specify what varies in this variant, and
                              optionally change the traffic weight
                            items:
                              type: object
                              properties:
                                id:
                                  type: string
                                  title: Variant id
                                  hints:
                                    - hidden
                                  maxLength: 128
                                  pattern: ^[A-Za-z0-9_\-]+$
                                name:
                                  type: string
                                  title: Name
                                queryPipeline:
                                  type: string
                                  title: Query Pipeline
                                params:
                                  type: array
                                  title: Query Params
                                  description: >-
                                    URL parameters to add to queries using this
                                    variant
                                  items:
                                    type: object
                                    required:
                                      - key
                                      - value
                                    properties:
                                      key:
                                        type: string
                                        title: Parameter Name
                                      value:
                                        type: string
                                        title: Parameter Value
                                      policy:
                                        type: string
                                        title: Update Policy
                                        enum:
                                          - replace
                                          - append
                                          - remove
                                          - default
                                        default: append
                                collection:
                                  type: string
                                  title: Collection
                                weight:
                                  type: number
                                  title: Weight
                                  description: >-
                                    Proportion of traffic to send to this
                                    variant. Higher values mean proportionally
                                    more traffic will be routed to this variant
                                  default: 1
                                  minimum: 0.01
                                  exclusiveMinimum: false
                          metrics:
                            type: array
                            title: Objectives
                            description: Metrics that will be used to evaluate the variants
                            minItems: 1
                            items:
                              type: object
                              required:
                                - name
                              properties:
                                name:
                                  type: string
                                  title: Name
                                description:
                                  type: string
                                  title: Description
                                primary:
                                  type: boolean
                                  title: Primary
                                  description: >-
                                    Whether this metric is the primary metric
                                    used for evaluating the variants (the
                                    'OEC').
                                jobId:
                                  type: string
                                  title: Associated Spark Job ID
                                  hints:
                                    - hidden
                                binary:
                                  type: boolean
                                  title: Binary-valued metric
                                  description: >-
                                    Whether this metric measures a Bernoulli
                                    trial (clicks, cart adds, etc) or a
                                    continuous-valued event.
                                  hints:
                                    - hidden
                          enabled:
                            type: boolean
                            title: Enabled
                            default: true
                            hints:
                              - readonly
                          startTimestamp:
                            type: string
                            title: Start Date
                            description: When the experiment last started
                            hints:
                              - readonly
                            format: date-time
                          runId:
                            type: string
                            title: Run Identifier
                            hints:
                              - readonly
                              - hidden
                          automaticallyAdjustTraffic:
                            type: boolean
                            title: Automatically Adjust Weights Between Variants
                            default: false
                        hints:
                          - hidden
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - experiment_sql
                        default: experiment_sql
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Custom Spark Job
                    description: Use this job when you want to run a custom JAR on Spark
                    required:
                      - id
                      - klassName
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      klassName:
                        type: string
                        title: Class name
                        description: Fully-qualified name of the Java/Scala class to invoke
                      submitArgs:
                        type: array
                        title: Script args
                        description: >-
                          Additional options to pass to the application when
                          running this job.
                        items:
                          type: string
                      script:
                        type: string
                        title: Scala Script
                        description: >-
                          Use this text field if you want to override the
                          default behaviour, which is to run
                          className.main(args)
                        hints:
                          - code/scala
                          - lengthy
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - custom_spark_scala_job
                        default: custom_spark_scala_job
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Delete Indexes in Milvus (deprecated)
                    description: Deletes specified indexes in Milvus collections
                    required:
                      - id
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      collections:
                        type: array
                        title: Collections
                        description: >-
                          List of collections in Milvus where indexes should be
                          dropped.
                        items:
                          type: string
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-milvus-delete-indexes
                        default: argo-milvus-delete-indexes
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Document Clustering
                    description: >-
                      Use this job when you want to cluster a set of documents
                      and attach cluster labels based on topics.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - outputCollection
                      - uidField
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing documents to be clustered
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to Vectorize
                        description: >-
                          Solr field containing text training data. Data from
                          multiple fields with different weights can be combined
                          by specifying them as field1:weight1,field2:weight2
                          etc.
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                        minLength: 1
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      uidField:
                        type: string
                        title: ID Field Name
                        description: ' Field containing the unique ID for each document.'
                        default: id
                        minLength: 1
                      clusterIdField:
                        type: string
                        title: Output Field Name for Cluster Id
                        description: Output field name for unique cluster id.
                        default: cluster_id
                      clusterLabelField:
                        type: string
                        title: Detected Cluster Keywords Field Name
                        description: >-
                          Output field name for top frequent terms that are
                          (mostly) unique for each cluster.
                        default: cluster_label
                      freqTermField:
                        type: string
                        title: Top Frequent Terms Field Name
                        description: >-
                          Output field name for top frequent terms in each
                          cluster. These may overlap with other clusters.
                        default: freq_terms
                      distToCenterField:
                        type: string
                        title: >-
                          Output Field Name for doc distance to its cluster
                          center
                        description: >-
                          Output field name for doc distance to its
                          corresponding cluster center (measure how
                          representative the doc is).
                        default: dist_to_center
                      minDF:
                        type: number
                        title: Min Doc Support
                        description: >-
                          Min number of documents the term has to show up.
                          value<1.0 denotes a percentage, value=1.0 denotes
                          100%, value>1.0 denotes the exact number.
                        default: 5
                      maxDF:
                        type: number
                        title: Max Doc Support
                        description: >-
                          Max number of documents the term can show up.
                          value<1.0 denotes a percentage, value=1.0 denotes
                          100%, value>1.0 denotes the exact number.
                        default: 0.5
                      kExact:
                        type: integer
                        title: Number of Clusters
                        description: Exact number of clusters.
                        default: 0
                      kMax:
                        type: integer
                        title: Max Possible Number of Clusters
                        description: Max possible number of clusters.
                        default: 20
                      kMin:
                        type: integer
                        title: Min Possible Number of Clusters
                        description: Min possible number of clusters.
                        default: 2
                      docLenTrim:
                        type: boolean
                        title: Find Extreme Length Doc Flag
                        description: ' Whether to separate out docs with extreme lengths.'
                        default: true
                      outlierTrim:
                        type: boolean
                        title: Find Outliers Flag
                        description: ' Whether to perform outlier detection.'
                        default: true
                      shortLen:
                        type: number
                        title: Length Threshold for Short Doc
                        description: >-
                          Length threshold to define short document. value<1.0
                          denotes a percentage, value=1.0 denotes 100%,
                          value>1.0 denotes the exact number. 
                        default: 5
                      longLen:
                        type: number
                        title: Length Threshold for Long Doc
                        description: >-
                          Length threshold to define long document. value<1.0
                          denotes a percentage, value=1.0 denotes 100%,
                          value>1.0 denotes the exact number. 
                        default: 0.99
                      numKeywordsPerLabel:
                        type: integer
                        title: Number of Keywords for Each Cluster
                        description: Number of Keywords needed for labeling each cluster.
                        default: 5
                      modelId:
                        type: string
                        title: Model ID
                        description: >-
                          Identifier for the model to be trained; uses the
                          supplied Spark Job ID if not provided.
                        hints:
                          - advanced
                        minLength: 1
                      w2vDimension:
                        type: integer
                        title: Word2Vec Dimension
                        description: >-
                          Word-vector dimensionality to represent text (choose >
                          0 to use, suggested dimension ranges: 100~150)
                        default: 0
                        hints:
                          - advanced
                        minimum: 0
                        exclusiveMinimum: false
                      w2vWindowSize:
                        type: integer
                        title: Word2Vec Window Size
                        description: >-
                          The window size (context words from [-window, window])
                          for word2vec
                        default: 8
                        hints:
                          - advanced
                        minimum: 3
                        exclusiveMinimum: false
                      norm:
                        type: integer
                        title: Vector normalization
                        description: >-
                          p-norm to normalize vectors with (choose -1 to turn
                          normalization off)
                        enum:
                          - -1
                          - 0
                          - 1
                          - 2
                        default: 2
                        hints:
                          - advanced
                      analyzerConfig:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" },{ "type": "patternreplace", "pattern":
                          "^[\\d.]+$", "replacement": " ", "replace": "all" },{
                          "type": "length", "min": "2", "max": "32767" },{
                          "type": "fusionstop", "ignoreCase": "true", "format":
                          "snowball", "words":
                          "org/apache/lucene/analysis/snowball/english_stop.txt"
                          }] }],"fields": [{ "regex": ".+", "analyzer":
                          "StdTokLowerStop" } ]}
                        hints:
                          - code/json
                          - advanced
                          - lengthy
                        minLength: 1
                      clusteringMethod:
                        type: string
                        title: Clustering Method (hierarchical or kmeans)
                        description: Choose between hierarchical vs kmeans clustering.
                        default: hierarchical
                        hints:
                          - advanced
                      outlierK:
                        type: integer
                        title: Number of outlier groups
                        description: Number of clusters to help find outliers.
                        default: 10
                        hints:
                          - advanced
                      outlierThreshold:
                        type: number
                        title: Outlier cutoff
                        description: >-
                          Identify as outlier group if less than this percent of
                          total documents. value<1.0 denotes a percentage,
                          value=1.0 denotes 100%, value>1.0 denotes the exact
                          number. 
                        default: 0.01
                        hints:
                          - advanced
                      minDivisibleSize:
                        type: number
                        title: Minimum divisible cluster size
                        description: >-
                          Clusters must have at least this many documents to be
                          split further. value<1.0 denotes a percentage,
                          value=1.0 denotes 100%, value>1.0 denotes the exact
                          number. 
                        default: 0
                        hints:
                          - advanced
                      kDiscount:
                        type: number
                        title: Discount for K when choosing number of clusters
                        description: >-
                          Applies a discount to help favor large/small K (number
                          of clusters). A smaller value pushes K to assume a
                          higher value within the [min, max] K range.
                        default: 0.7
                        hints:
                          - advanced
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - doc_clustering
                        default: doc_clustering
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - uidField
                          - clusterIdField
                          - freqTermField
                          - clusterLabelField
                          - distToCenterField
                      - label: Model Tuning Parameters
                        properties:
                          - clusteringMethod
                          - outlierTrim
                          - outlierK
                          - outlierThreshold
                          - kExact
                          - kMax
                          - kMin
                          - w2vDimension
                          - w2vWindowSize
                          - maxDF
                          - minDF
                          - norm
                          - numKeywordsPerLabel
                          - minDivisibleSize
                          - kDiscount
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                          - docLenTrim
                          - longLen
                          - shortLen
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: Smart Answers Supervised Training
                    description: >-
                      Trains Smart Answers model on a supervised basis with
                      pre-trained or trained embeddings and deploys the trained
                      model to the ML Model Service
                    required:
                      - id
                      - trainingCollection
                      - trainingFormat
                      - questionColName
                      - answerColName
                      - deployModelName
                      - modelReplicas
                      - modelBase
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      useAutoML:
                        type: boolean
                        title: Perform auto hyperparameter tuning
                        description: >-
                          Automatically tune hyperparameters (will take longer
                          to train). Transformer models aren't used in this
                          regime
                        default: false
                      trainingCollection:
                        type: string
                        title: Training data path
                        description: >-
                          Solr collection or cloud storage path where training
                          data is present.
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Training data format
                        description: The format of the training data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`
                        hints:
                          - code/sql
                          - advanced
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      questionColName:
                        type: string
                        title: Question Field
                        description: Name of the field containing questions
                        minLength: 1
                      answerColName:
                        type: string
                        title: Answer Field
                        description: Name of the field containing answers
                        minLength: 1
                      weightColName:
                        type: string
                        title: Weight Field
                        description: Name of the field to be used for weights
                        minLength: 1
                      deployModelName:
                        type: string
                        title: Model Deployment Name
                        description: >-
                          Name of the model to be used for deployment (must be a
                          valid lowercased DNS subdomain with no underscores)
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      testMode:
                        type: boolean
                        title: Test Mode
                        description: >-
                          If set to true, then the training will exit after the
                          first iteration. Useful for ensuring that the
                          end-to-end pipeline is working
                        default: false
                        hints:
                          - hidden
                      modelReplicas:
                        type: integer
                        title: Model replicas
                        description: >-
                          How many replicas of the model should be deployed by
                          Seldon Core
                        default: 1
                      modelBase:
                        type: string
                        title: Model base
                        description: >-
                          Specify one of these custom embeddings:
                          ['word_custom', 'bpe_custom'] or choose one of the
                          included pre-trained embeddings / models.
                        enum:
                          - word_custom
                          - bpe_custom
                          - word_en_300d_2M
                          - bpe_en_300d_10K
                          - bpe_en_300d_200K
                          - bpe_ja_300d_100K
                          - bpe_ko_300d_100K
                          - bpe_zh_300d_50K
                          - bpe_multi_300d_320K
                          - distilbert_en
                          - distilbert_multi
                          - biobert_v1.1
                        default: word_en_300d_2M
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: >-
                          The proportion of data to be sampled from the full
                          dataset. Use a value between 0 and 1 for a proportion
                          (e.g. 0.5 for 50%), or for a specific number of
                          examples, use an integer larger than 1. Leave blank
                          for no sampling
                        hints:
                          - advanced
                      seed:
                        type: integer
                        title: Seed
                        description: Random seed for sampling
                        default: 12345
                        hints:
                          - hidden
                      minTokensNum:
                        type: integer
                        title: Minimum number of words in doc
                        description: >-
                          Drop document if the total words is lower than this
                          value
                        default: 1
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      maxTokensNum:
                        type: integer
                        title: Maximum number of words in doc
                        description: >-
                          Drop document if the total words is greater than this
                          value
                        default: 5000
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      lowerCases:
                        type: boolean
                        title: Lower case all words
                        description: >-
                          Whether to lower case all words in training, i.e.
                          whether to treat upper case and lower case words
                          equally. Only utilized for custom embeddings or for
                          the default model base: word_en_300d_2M.
                        default: false
                      maxVocabSize:
                        type: integer
                        title: Maximum vocabulary size
                        description: >-
                          Maximum number of words in vocabulary, words will be
                          trimmed if frequency is too low. Only utilized for
                          custom embeddings or for the default model base:
                          word_en_300d_2M.
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      w2vEpochs:
                        type: integer
                        title: Word2Vec training epochs
                        description: Number of epochs to train custom word2vec embeddings
                        default: 15
                        hints:
                          - advanced
                      w2vTextsCollection:
                        type: string
                        title: Texts data path
                        description: >-
                          Solr collection or cloud storage path which contains
                          extra documents that will be used to get better
                          vocabulary coverage as well as to train custom word
                          embeddings if custom Model Base is specified.
                      w2vTextColumns:
                        type: string
                        title: Texts collection fields
                        description: >-
                          Which fields in the text collection to use. If
                          multiple fields, please separate them by comma, e.g.
                          description_t,title_t.
                      textsFormat:
                        type: string
                        title: Texts format
                        description: >-
                          The format of the texts training data - solr, parquet
                          etc.
                      w2vVectorSize:
                        type: integer
                        title: Size of word vectors
                        description: >-
                          Word-vector dimensionality to represent text
                          (suggested dimension ranges: 100~300)
                        default: 150
                        hints:
                          - advanced
                      w2vWindowSize:
                        type: integer
                        title: Word2Vec window size
                        description: >-
                          The window size (context words from [-window, window])
                          for Word2Vec
                        default: 8
                        hints:
                          - advanced
                      valSize:
                        type: number
                        title: Validation sample size
                        description: >-
                          Proportion of the unique questions that should be used
                          as validation samples. When val_size > 1, then that
                          specific number of unique questions will be sampled
                          rather than a proportion.
                        default: 0.1
                        minimum: 0.001
                        exclusiveMinimum: false
                      maxLen:
                        type: integer
                        title: Max length
                        description: >-
                          Maximum length of text processed by the model. Texts
                          longer than this value will be trimmed. This parameter
                          is especially important for Transformer-based models
                          as it affects training and inference time. Note that
                          the maximum supported length for Transformer models is
                          512, so you can specify any value up to that. The
                          default value is the max value between three times the
                          STD of question lengths and two times the STD of
                          answer lengths.
                        hints:
                          - advanced
                      embSPDP:
                        type: number
                        title: Dropout ratio
                        description: >-
                          Fraction of input to drop with Dropout layer (from
                          0-1)
                        default: 0.3
                      trainBatch:
                        type: integer
                        title: Training batch size
                        description: >-
                          Batch size during training. If left blank, this will
                          be set automatically based on the input data
                      infBatch:
                        type: integer
                        title: Inference batch size used in validation
                        description: >-
                          Batch size during validation. If left blank, this will
                          be set automatically based on the input data
                        hints:
                          - advanced
                      rnnNamesList:
                        type: string
                        title: RNN function list
                        description: >-
                          List of layers of RNNs to be used, with possible
                          values of lstm, gru. E.g. ["lstm", "lstm"]. This value
                          will be automatically decided based on data if left
                          blank
                      rnnUnitsList:
                        type: string
                        title: RNN function units list
                        description: >-
                          List of RNN layer units numbers, corresponding to RNN
                          function list. E.g. 150, 150. This value will be
                          automatically decided based on data if left blank
                      epochs:
                        type: integer
                        title: Number of epochs to be used in training
                      weightDecay:
                        type: number
                        title: Weight decay
                        description: >-
                          L2 penalty used in AdamW optimizer. Bigger values will
                          provide stronger regularization. Default values are
                          0.0003 for RNN models and 0.01 for Transformer models.
                      monitorPatience:
                        type: integer
                        title: Monitor patience
                        description: >-
                          Stop training if no improvement in metrics by this
                          number of epochs
                      baseLR:
                        type: number
                        title: Base learning rate
                        description: >-
                          Base learning rate that should be used during
                          training. Reasonable values are from 0.0001 to 0.003
                          depending on model base. It's better to use lower LR
                          with Transformer models.
                      minLR:
                        type: number
                        title: Minimum learning rate
                        description: >-
                          Minimum learning rate used during training. Reasonable
                          values are from 0.00001 to 0.00003.
                        hints:
                          - advanced
                      numWarmUpEpochs:
                        type: integer
                        title: Number of warm-up epochs
                        description: >-
                          Number of epochs used for the warm-up stage for
                          learning rates. Reasonable values are from 0-4 epochs,
                          usually 1-2 are used.
                      numFlatEpochs:
                        type: integer
                        title: Number of flat epochs
                        description: >-
                          Number of epochs used in flat stage for learning
                          rates. Reasonable value would be one-half of the
                          epochs, so the other half will be with Cosine
                          Annealing learning rate.
                      extraTrainingArgs:
                        type: string
                        title: Extra training args for Python scripts
                        description: >-
                          Add any additional arguments for the Python training
                          scripts in this field
                        hints:
                          - hidden
                      monitorMetric:
                        type: string
                        title: Monitor metric
                        description: >-
                          The main metric at k that should be monitored to
                          decide when to stop training. Possible metrics are:
                          ["map", "mrr", "recall", "precision"]
                        default: mrr@3
                      monitorMetricsList:
                        type: string
                        title: Metrics list
                        description: >-
                          List of evaluation metrics on validation data that
                          will be printed in the log at the end of each epoch.
                          Possible metrics are: ["map", "mrr", "recall",
                          "precision"]
                        default: '["map", "mrr", "recall"]'
                      kList:
                        type: string
                        title: Metrics@k list
                        description: >-
                          The k retrieval position that will be used to compute
                          for each metric
                        default: '[1,3,5]'
                      numClusters:
                        type: integer
                        title: Number of clusters
                        description: >-
                          DEPRECATED: please, consider using Milvus for fast
                          dense vector similarity search. Number of clusters to
                          be used for fast dense vector retrieval. Note no
                          clustering will be applied if this is set to 0. If
                          left blank, cluster count will be inferred by the job
                          depending on the data
                        default: 0
                        hints:
                          - advanced
                      topKClusters:
                        type: integer
                        title: Top k of clusters to return
                        description: >-
                          How many closest clusters the model can find for each
                          query. At retrieval time, all answers in top k nearest
                          clusters will be returned and reranked
                        default: 10
                        hints:
                          - advanced
                      unidecode:
                        type: boolean
                        title: Apply unicode decoding
                        description: >-
                          Use Unidecode library to transform Unicode input into
                          ASCII transliterations. Only utilized for custom
                          embeddings or for the default model base:
                          word_en_300d_2M
                        default: true
                      useMixedPrecision:
                        type: string
                        title: Use Mixed Precision
                        description: >-
                          Check this option to train a model with mixed
                          precision support.This will only work if the node has
                          a GPU. You'll only see a speed up on newer NVidia GPUs
                          (Turing and later) with Transformer models.
                        enum:
                          - auto
                          - 'true'
                          - 'false'
                        default: auto
                        hints:
                          - advanced
                      useLabelingResolution:
                        type: boolean
                        title: Use Labeling Resolution
                        description: >-
                          Check this to determine similar questions and similar
                          answers via labeling resolution and graph connected
                          components. Does not work well with noisy data like
                          eCommerce queries. But helps with FAQ / QnA data.
                        default: false
                      useLayerNorm:
                        type: boolean
                        title: Use Layer Norm
                        description: Check this to use layer norm for pooling.
                        default: false
                        hints:
                          - advanced
                      globalPoolType:
                        type: string
                        title: Global Pool Type
                        description: >-
                          Determines how token vectors should be aggregated to
                          obtain final content vector. Must be one of: [avg,
                          max, self_attention].
                        enum:
                          - avg
                          - max
                          - self_attention
                        default: self_attention
                        hints:
                          - advanced
                      embTrainable:
                        type: boolean
                        title: Fine-tune Token Embeddings
                        description: >-
                          Choose this to fine-tune token embeddings during model
                          training. Tends to work well with eCommerce data.
                        default: false
                        hints:
                          - advanced
                      eps:
                        type: number
                        title: Eps
                        description: >-
                          Epsilon is the AdamW optimizer. By default 1e-8 is
                          used for RNN models and 1e-6 is used for Transformer
                          models.
                        hints:
                          - advanced
                      maxGradNorm:
                        type: number
                        title: Max Grad Norm
                        description: >-
                          Max norm used for gradients clipping. By default it’s
                          not used for RNN models but 1.0 value is used for
                          Transformer models.
                        hints:
                          - advanced
                      useXbm:
                        type: string
                        title: Use Cross-batch memory
                        description: >-
                          Stores encoded representations of previous batches in
                          memory for better negative examples sampling. Works
                          well for Transformer models. Leave this at 'auto' to
                          let the training module determine this.
                        enum:
                          - auto
                          - 'true'
                          - 'false'
                        default: auto
                        hints:
                          - advanced
                      xbmMemorySize:
                        type: integer
                        title: Cross-batch memory size
                        description: >-
                          Number of examples from the previous batches that are
                          stored in memory. The default size for Transformer
                          models is 256.
                        hints:
                          - advanced
                      xbmEpochActivation:
                        type: integer
                        title: Cross-batch epoch activation
                        description: >-
                          After which epoch cross-batch memory should be
                          activated. By default it’s activated after the first
                          epoch for Transformer models.
                        hints:
                          - advanced
                      evalAnnIndex:
                        type: string
                        title: Eval ANN index
                        description: >-
                          Choose this to use Approximate Nearest Neighbor search
                          during evaluation. For big datasets it can speed up
                          the evaluation time with minimum loss in accuracy, for
                          small datasets it will most likely make it slower.
                        enum:
                          - auto
                          - 'true'
                          - 'false'
                        default: auto
                        hints:
                          - advanced
                      distance:
                        type: string
                        title: Distance
                        description: >-
                          Vectors distance/similarity that should be used during
                          training and in the pipelines. Choose one of:
                          ['cosine_similarity', 'dot_product_similarity',
                          'euclidean_distance'].
                        enum:
                          - cosine_similarity
                          - dot_product_similarity
                          - euclidean_distance
                        default: cosine_similarity
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-qna-supervised
                        default: argo-qna-supervised
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - trainingFormat
                          - trainingDataFilterQuery
                          - seed
                          - trainingSampleFraction
                          - questionColName
                          - answerColName
                          - weightColName
                          - w2vTextsCollection
                          - w2vTextColumns
                          - textsFormat
                          - deployModelName
                          - modelReplicas
                          - secretName
                      - label: Data Preprocessing
                        properties:
                          - useLabelingResolution
                          - unidecode
                          - lowerCases
                          - minTokensNum
                          - maxTokensNum
                          - maxVocabSize
                      - label: Custom Embeddings Initialization
                        properties:
                          - w2vEpochs
                          - w2vVectorSize
                          - w2vWindowSize
                      - label: Evaluation Parameters
                        properties:
                          - valSize
                          - monitorMetric
                          - monitorPatience
                          - monitorMetricsList
                          - kList
                          - evalAnnIndex
                      - label: General Encoder Parameters
                        properties:
                          - embTrainable
                          - maxLen
                          - globalPoolType
                          - useLayerNorm
                          - numClusters
                          - topKClusters
                      - label: RNN Encoder Parameters
                        properties:
                          - embSPDP
                          - rnnNamesList
                          - rnnUnitsList
                      - label: Training Parameters
                        properties:
                          - epochs
                          - trainBatch
                          - infBatch
                          - baseLR
                          - numWarmUpEpochs
                          - numFlatEpochs
                          - minLR
                          - weightDecay
                          - distance
                          - eps
                          - maxGradNorm
                          - useMixedPrecision
                          - useXbm
                          - xbmMemorySize
                          - xbmEpochActivation
                  - type: object
                    title: Phrase Extraction (Deprecated)
                    description: >-
                      Use this job when you want to identify statistically
                      significant phrases in your content. This job is
                      deprecated.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - analyzerConfig
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing labeled training data
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to Vectorize
                        description: >-
                          Solr field containing text training data. Data from
                          multiple fields with different weights can be combined
                          by specifying them as field1:weight1,field2:weight2
                          etc.
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 8180
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: >-
                          Solr Collection to store extracted phrases; defaults
                          to the query_rewrite_staging collection for the
                          associated app.
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      ngramSize:
                        type: integer
                        title: Ngram Size
                        description: >-
                          The number of words in the ngram you want to consider
                          for the sips.
                        default: 3
                        maximum: 5
                        exclusiveMaximum: false
                        minimum: 2
                        exclusiveMinimum: false
                      minmatch:
                        type: integer
                        title: Minimum Count
                        description: >-
                          The number of times a phrase must exist to be
                          considered. NOTE: if input is non signal data, please
                          reduce the number to e.g. 5.
                        default: 100
                        minimum: 1
                        exclusiveMinimum: false
                      analyzerConfig:
                        type: string
                        title: Lucene Text Analyzer
                        description: The style of text analyzer you would like to use.
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" }] }],"fields": [{
                          "regex": ".+", "analyzer": "StdTokLowerStop" } ]}
                        hints:
                          - lengthy
                          - code/json
                      attachPhrases:
                        type: boolean
                        title: Extract Key Phrases from Input Text
                        description: >-
                          Checking this will cause the job to associate
                          extracted phrases from each source doc. and write them
                          back to the output collection. If input data is
                          signals, it is suggested to turn this option off.
                          Also, currently it is not allowed to check this option
                          while attempting to write to a _query_rewrite_staging
                          collection.
                        default: false
                        hints:
                          - advanced
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      minLikelihood:
                        type: number
                        title: Minimum Likelihood Score
                        description: >-
                          Phrases below this threshold will not be written in
                          the output of this job.
                        hints:
                          - advanced
                      enableAutoPublish:
                        type: boolean
                        title: Enable auto-publishing
                        description: >-
                          If true, automatically publishes rewrites for rules.
                          Default is false to allow for initial human-aided
                          reviewing
                        default: false
                        hints:
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - sip
                        default: sip
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                      - label: Model Tuning Parameters
                        properties:
                          - minmatch
                          - ngramSize
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                  - type: object
                    title: Parallel Bulk Loader
                    description: >-
                      Use this job when you want to load data into Fusion from a
                      SparkSQL compliant datasource, and send this data to any
                      Spark supported datasource (Solr/Index
                      Pipeline/S3/GCS/...).
                    required:
                      - id
                      - format
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      format:
                        type: string
                        title: Format
                        description: >-
                          Specifies the input data source format; common
                          examples include: parquet, json, textinputformat
                      path:
                        type: string
                        title: Path
                        description: >-
                          Path to load; for data sources that support multiple
                          paths, separate by commas
                      streaming:
                        type: object
                        title: Streaming
                        required:
                          - enableStreaming
                        properties:
                          enableStreaming:
                            type: boolean
                            title: Enable Streaming
                            description: >-
                              Stream data from input source to output Solr
                              collection
                          outputMode:
                            type: string
                            title: Output mode
                            description: >-
                              Specifies the output mode for streaming. E.g.,
                              append (default), complete, update
                            enum:
                              - append
                              - complete
                              - update
                            default: append
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options passed to the data source to configure the
                          read operation; options differ for every data source
                          so refer to the documentation for more information.
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: >-
                          Solr Collection to send the documents loaded from the
                          input data source.
                      outputIndexPipeline:
                        type: string
                        title: Send to Index Pipeline
                        description: >-
                          Send the documents loaded from the input data source
                          to an index pipeline instead of going directly to
                          Solr.
                      outputParser:
                        type: string
                        title: Send to Parser
                        description: >-
                          Parser to send the documents to while sending to index
                          pipeline. (Defaults to same as index pipeline)
                        hints:
                          - advanced
                      defineFieldsUsingInputSchema:
                        type: boolean
                        title: Define Fields in Solr?
                        description: >-
                          If true, define fields in Solr using the input schema;
                          if a SQL transform is defined, the fields to define
                          are based on the transformed DataFrame schema instead
                          of the input.
                        default: true
                        hints:
                          - advanced
                      atomicUpdates:
                        type: boolean
                        title: Send as Atomic Updates?
                        description: >-
                          Send documents to Solr as atomic updates; only applies
                          if sending directly to Solr and not an index pipeline.
                        default: false
                        hints:
                          - advanced
                      timestampFieldName:
                        type: string
                        title: Timestamp Field Name
                        description: >-
                          Name of the field that holds a timestamp for each
                          document; only required if using timestamps to filter
                          new rows from the input source.
                        hints:
                          - advanced
                      clearDatasource:
                        type: boolean
                        title: Clear Existing Documents
                        description: >-
                          If true, delete any documents indexed in Solr by
                          previous runs of this job. Default is false.
                        default: false
                        hints:
                          - advanced
                      outputPartitions:
                        type: integer
                        title: Output Partitions
                        description: >-
                          Partition the input DataFrame into partitions before
                          writing out to Solr or Fusion
                        hints:
                          - advanced
                      optimizeOutput:
                        type: integer
                        title: Optimize
                        description: >-
                          Optimize the Solr collection down to the specified
                          number of segments after writing to Solr.
                        hints:
                          - advanced
                      cacheAfterRead:
                        type: boolean
                        title: Cache After Read
                        description: >-
                          Cache input data in memory (and disk as needed) after
                          reading; default is false, setting to true may help
                          stability of the job by reading all data from the
                          input source first before transforming or writing to
                          Solr. This could make the job run slower as it adds an
                          intermediate write operation.
                        default: false
                        hints:
                          - hidden
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output. For output formats
                          other than solr or index-pipeline, format and path
                          options can be specified here
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      transformScala:
                        type: string
                        title: Transform Scala
                        description: >-
                          Optional Scala script used to transform the results
                          returned by the data source before indexing. You must
                          define your transform script in a method with
                          signature: def transform(inputDF: Dataset[Row]) :
                          Dataset[Row]
                        hints:
                          - advanced
                          - lengthy
                          - code/scala
                      mlModelId:
                        type: string
                        title: Spark ML PipelineModel ID
                        description: >-
                          The ID of the Spark ML PipelineModel stored in the
                          Fusion blob store.
                        hints:
                          - advanced
                        reference: blob
                        blobType: model:ml-model
                      transformSql:
                        type: string
                        title: Transform SQL
                        description: >-
                          Optional SQL used to transform the results returned by
                          the data source before indexing. The input DataFrame
                          returned from the data source will be registered as a
                          temp table named '_input'. The Scala transform is
                          applied before the SQL transform if both are provided,
                          which allows you to define custom UDFs in the Scala
                          script for use in your transformation SQL.
                        hints:
                          - advanced
                          - lengthy
                          - code/sql
                      shellOptions:
                        type: array
                        title: Spark Shell Options
                        description: >-
                          Additional options to pass to the Spark shell when
                          running this job.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      templateParams:
                        type: array
                        title: Interpreter Params
                        description: Bind the key/values to the script interpreter
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      continueAfterFailure:
                        type: boolean
                        title: Continue after index failure
                        description: >-
                          If set to true, when a failure occurs when sending a
                          document through an index pipeline, the job will
                          continue onto the next document instead of failing
                        default: false
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - parallel-bulk-loader
                        default: parallel-bulk-loader
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Outlier Detection
                    description: >-
                      Use this job when you want to find outliers from a set of
                      documents and attach labels for each outlier group.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - uidField
                      - outputCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing documents to be clustered
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to Vectorize
                        description: >-
                          Solr field containing text training data. Data from
                          multiple fields with different weights can be combined
                          by specifying them as field1:weight1,field2:weight2
                          etc.
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                        minLength: 1
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelId:
                        type: string
                        title: Model ID
                        description: >-
                          Identifier for the model to be trained; uses the
                          supplied Spark Job ID if not provided.
                        hints:
                          - advanced
                        minLength: 1
                      outlierGroupIdField:
                        type: string
                        title: Output Field Name for Outlier Group Id
                        description: Output field name for unique outlier group id.
                        default: outlier_group_id
                      outlierGroupLabelField:
                        type: string
                        title: Top Unique Terms Field Name
                        description: >-
                          Output field name for top frequent terms that are
                          (mostly) unique for each outlier group as computed
                          based on TF-IDF and group Id.
                        default: outlier_group_label
                      outputOutliersOnly:
                        type: boolean
                        title: Only save outliers?
                        description: >-
                          If true, only outliers are saved in the output
                          collection, otherwise, the whole dataset is saved.
                        default: false
                      uidField:
                        type: string
                        title: ID Field Name
                        description: ' Field containing the unique ID for each document.'
                        default: id
                        minLength: 1
                      analyzerConfig:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" },{ "type": "length", "min": "2", "max":
                          "32767" },{ "type": "fusionstop", "ignoreCase":
                          "true", "format": "snowball", "words":
                          "org/apache/lucene/analysis/snowball/english_stop.txt"
                          }] }],"fields": [{ "regex": ".+", "analyzer":
                          "StdTokLowerStop" } ]}
                        hints:
                          - lengthy
                          - code/json
                        minLength: 1
                      freqTermField:
                        type: string
                        title: Top Frequent Terms Field Name
                        description: >-
                          Output field name for top frequent terms in each
                          cluster. These may overlap with other clusters.
                        default: freq_terms
                      distToCenterField:
                        type: string
                        title: >-
                          Output Field Name for doc distance to its cluster
                          center
                        description: >-
                          Output field name for doc distance to its
                          corresponding cluster center (measure how
                          representative the doc is).
                        default: dist_to_center
                      norm:
                        type: integer
                        title: Vector normalization
                        description: >-
                          p-norm to normalize vectors with (choose -1 to turn
                          normalization off)
                        enum:
                          - -1
                          - 0
                          - 1
                          - 2
                        default: 2
                        hints:
                          - advanced
                      minDF:
                        type: number
                        title: Min Doc Support
                        description: >-
                          Min number of documents the term has to show up.
                          value<1.0 denotes a percentage, value=1.0 denotes
                          100%, value>1.0 denotes the exact number.
                        default: 5
                      maxDF:
                        type: number
                        title: Max Doc Support
                        description: >-
                          Max number of documents the term can show up.
                          value<1.0 denotes a percentage, value=1.0 denotes
                          100%, value>1.0 denotes the exact number.
                        default: 0.75
                      numKeywordsPerLabel:
                        type: integer
                        title: Number of Keywords for Each Cluster
                        description: Number of Keywords needed for labeling each cluster.
                        default: 5
                      outlierK:
                        type: integer
                        title: Number of outlier groups
                        description: Number of clusters to help find outliers.
                        default: 10
                        hints:
                          - advanced
                      outlierThreshold:
                        type: number
                        title: Outlier cutoff
                        description: >-
                          Identify as outlier group if less than this percent of
                          total documents. value<1.0 denotes a percentage,
                          value=1.0 denotes 100%, value>1.0 denotes the exact
                          number.
                        default: 0.01
                        hints:
                          - advanced
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - outlier_detection
                        default: outlier_detection
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                          - outputOutliersOnly
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - uidField
                          - outlierGroupIdField
                          - outlierGroupLabelField
                          - freqTermField
                          - distToCenterField
                      - label: Model Tuning Parameters
                        properties:
                          - outlierK
                          - outlierThreshold
                          - maxDF
                          - minDF
                          - norm
                          - numKeywordsPerLabel
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: ALS Recommender (deprecated)
                    description: >-
                      Use this job when you want to compute user recommendations
                      or item similarities using a collaborative filtering
                      recommender. You can also implement a user-to-item
                      recommender in the advanced section of this job’s
                      configuration UI. Deprecated as of Fusion 5.2.0 and will
                      be removed in a future release; use the BPR Recommender
                      instead.
                    required:
                      - id
                      - trainingCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Recommender Training Collection
                        description: >-
                          User/Item preference collection (often a signals
                          collection or signals aggregation collection)
                      outputCollection:
                        type: string
                        title: Items-for-users Recommendation Collection
                        description: >-
                          Collection to store batch-predicted user/item
                          recommendations (if absent, none computed)
                      outputItemSimCollection:
                        type: string
                        title: Item-to-item Similarity Collection
                        description: >-
                          Collection to store batch-computed item/item
                          similarities (if absent, none computed)
                      numRecs:
                        type: integer
                        title: Number of User Recommendations to Compute
                        description: >-
                          Batch compute and store this many item recommendations
                          per user
                        default: 10
                      numSims:
                        type: integer
                        title: Number of Item Similarites to Compute
                        description: >-
                          Batch compute and store this many item similarities
                          per item
                        default: 10
                      implicitRatings:
                        type: boolean
                        title: Implicit Preferences
                        description: >-
                          Treat training preferences as implicit signals of
                          interest (i.e. clicks or other actions) as opposed to
                          explicit item ratings
                        default: true
                      deleteOldRecs:
                        type: boolean
                        title: Delete Old Recommendations
                        description: >-
                          Delete old recommendations after generating new
                          recommendations.
                        default: true
                      excludeFromDeleteFilter:
                        type: string
                        title: Exclude from Delete Filter
                        description: >-
                          If the 'Delete Old Recommendations' flag is enabled,
                          then use this query filter to identify existing
                          recommendation docs to exclude from delete. The filter
                          should identify recommendation docs you want to keep.
                        hints:
                          - advanced
                      outputUserRecsCollection:
                        type: string
                        title: Users-for-items Recommendation Collection
                        description: >-
                          Collection to store batch-predicted item/user
                          recommendations (if absent, none computed)
                        hints:
                          - advanced
                      numUserRecsPerItem:
                        type: integer
                        title: Number of Users to Recommend to each Item
                        description: >-
                          Batch compute and store this many user recommendations
                          per item
                        default: 10
                        hints:
                          - advanced
                      modelId:
                        type: string
                        title: Recommender Model ID
                        description: >-
                          Identifier for the recommender model. Will be used as
                          the unique key when storing the model in Solr. If
                          absent, it will default to the job ID.
                        hints:
                          - advanced
                      saveModel:
                        type: boolean
                        title: Save Model in Solr
                        description: Whether we should save the computed ALS model in Solr
                        default: false
                        hints:
                          - advanced
                      modelCollection:
                        type: string
                        title: Model Collection
                        description: >-
                          Collection to load and store the computed model, if
                          "Save Model" is true. Defaults to "[app
                          name]_recommender_models"
                        hints:
                          - advanced
                        minLength: 1
                      alwaysTrain:
                        type: boolean
                        title: Force model re-training
                        description: >-
                          Even if a model with this modelId exists, re-train if
                          set true
                        default: true
                        hints:
                          - advanced
                      maxTrainingIterations:
                        type: integer
                        title: Maximum Training Iterations
                        description: >-
                          Maximum number of iterations to use when learning the
                          matrix decomposition
                        default: 10
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr query to filter training data (e.g. downsampling
                          or selecting based on min. pref values)
                        default: '*:*'
                        hints:
                          - advanced
                      popularItemMin:
                        type: integer
                        title: Training Data Filter By Popular Items
                        description: >-
                          Items must have at least this # of unique users
                          interacting with it to go into the sample
                        default: 2
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: >-
                          Downsample preferences for items (bounded to at least
                          2) by this fraction
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      userIdField:
                        type: string
                        title: Training Collection User Id Field
                        description: Solr field name containing stored user ids
                        default: user_id_s
                        hints:
                          - advanced
                      itemIdField:
                        type: string
                        title: Training Collection Item Id Field
                        description: Solr field name containing stored item ids
                        default: item_id_s
                        hints:
                          - advanced
                      weightField:
                        type: string
                        title: Training Collection Weight Field
                        description: >-
                          Solr field name containing stored weights or
                          preferences the user has for that item
                        default: weight_d
                        hints:
                          - advanced
                      initialBlocks:
                        type: integer
                        title: Training Block Size
                        description: >-
                          Number of sub-matrix blocks to break the training data
                          into (default: -1, for auto-sizing)
                        default: -1
                        hints:
                          - hidden
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Training DataFrame Config Options
                        description: >-
                          Additional Spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      initialRank:
                        type: integer
                        title: Recommender Rank
                        description: >-
                          Number of user/item factors in the recommender
                          decomposition (or starting guess for it, if doing
                          parameter grid search)
                        default: 100
                        hints:
                          - advanced
                      initialAlpha:
                        type: number
                        title: Implicit Preference Confidence
                        description: >-
                          Confidence weight to give the implicit preferences (or
                          starting guess, if doing parameter grid search)
                        default: 50
                        hints:
                          - advanced
                      initialLambda:
                        type: number
                        title: Initial Lambda
                        description: >-
                          Smoothing parameter to avoid overfitting (or starting
                          guess, if doing parameter grid search). Slightly
                          larger value needed for small data sets
                        default: 0.01
                        hints:
                          - advanced
                      gridSearchWidth:
                        type: integer
                        title: Grid Search Width
                        description: >-
                          Parameter grid search to be done centered around
                          initial parameter guesses, exponential step size, this
                          number of steps (if <= 0, no grid search). 1 is a
                          reasonable number to start with.
                        default: 0
                        hints:
                          - advanced
                      randomSeed:
                        type: integer
                        title: Random Seed
                        description: >-
                          Pseudorandom determinism fixed by keeping this seed
                          constant
                        default: 13
                        hints:
                          - advanced
                      itemMetadataFields:
                        type: array
                        title: Item Metadata Fields
                        description: >-
                          List of item metadata fields to include in the
                          recommendation output documents.
                        hints:
                          - advanced
                        items:
                          type: string
                      itemMetadataCollection:
                        type: string
                        title: Item Metadata Collection
                        description: >-
                          Fusion collection or catalog asset ID containing item
                          metadata fields you want to add to the recommendation
                          output documents.
                        hints:
                          - advanced
                      itemMetadataJoinField:
                        type: string
                        title: Item Metadata Join Field
                        description: >-
                          Name of field in the item metadata collection to join
                          on; defaults to the item id field configured for this
                          job.
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: Options used when writing output to Solr.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format which training data comes in
                          (like 'solr', 'hdfs', 'file', 'parquet' etc)
                        default: solr
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - als_recommender
                        default: als_recommender
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - outputUserRecsCollection
                          - outputItemSimCollection
                          - writeOptions
                      - label: Model Tuning Parameters
                        properties:
                          - numSims
                          - implicitRatings
                          - deleteOldRecs
                      - label: Training Data Settings
                        properties:
                          - trainingDataFilterQuery
                          - popularItemMin
                          - trainingSampleFraction
                          - userIdField
                          - itemIdField
                          - weightField
                          - maxIters
                          - trainingDataFrameConfigOptions
                          - initialBlocks
                      - label: Model Settings
                        properties:
                          - modelId
                          - saveModel
                          - modelCollection
                          - alwaysTrain
                      - label: Grid Search Settings
                        properties:
                          - initialRank
                          - gridSearchWidth
                          - initialAlpha
                          - initialLambda
                          - randomSeed
                      - label: Item Metadata Settings
                        properties:
                          - itemMetadataCollection
                          - itemMetadataJoinField
                          - itemMetadataFields
                  - type: object
                    title: Upload Model Parameters To Cloud
                    description: Upload a trained model's parameters to cloud storage
                    required:
                      - id
                      - modelName
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelName:
                        type: string
                        title: Model name
                        description: >-
                          The model name of the Seldon Core deployment to upload
                          (must be a valid lowercased DNS subdomain with no
                          underscores).
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      cloudPath:
                        type: string
                        title: Cloud Path
                        description: >-
                          Path to cloud storage location that will contain the
                          saved parameters for this model - the model version
                          will be appended to the filename at the end of the
                          path string. Supports S3, GCS, or Azure Blob Storage
                          URIs
                      cloudSecret:
                        type: string
                        title: Kubernetes secret name for cloud storage access
                        description: >-
                          Defines the Kubernetes secret that will be used to
                          access cloud storage
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-upload-model
                        default: argo-upload-model
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Classification
                    description: >-
                      Trains a classification model to classify text documents
                      by assigning a label to them.
                    required:
                      - id
                      - trainingCollection
                      - trainingFormat
                      - textField
                      - labelField
                      - deployModelName
                      - workflowType
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      stopwordsBlobName:
                        type: string
                        title: Stopwords Blob Store
                        description: >-
                          Name of the stopwords blob resource. This is a .txt
                          file with one stopword per line. By default the file
                          is called stopwords/stopwords_en.txt however a custom
                          file can also be used. Check documentation for more
                          details on format and uploading to blob store.
                        default: stopwords/stopwords_en.txt
                        reference: blob
                        blobType: file:spark
                      trainingCollection:
                        type: string
                        title: Training data path
                        description: >-
                          Solr collection or cloud storage path where training
                          data is present.
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Training data format
                        description: The format of the training data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      textField:
                        type: string
                        title: Training collection content field
                        description: Solr field name containing the text to be classified
                        minLength: 1
                      labelField:
                        type: string
                        title: Training collection class field
                        description: >-
                          Solr field name containing the classes/labels for the
                          text
                        minLength: 1
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`.
                        hints:
                          - code/sql
                          - advanced
                      randomSeed:
                        type: integer
                        title: Random Seed
                        description: >-
                          Pseudorandom determinism fixed by keeping this seed
                          constant
                        default: 12345
                        hints:
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: Choose a fraction of the data for training.
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      deployModelName:
                        type: string
                        title: Model Deployment Name
                        description: >-
                          Name of the model to be used for deployment (must be a
                          valid lowercased DNS subdomain with no underscores).
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      workflowType:
                        type: string
                        title: Method
                        description: Method to be used for classification.
                        enum:
                          - Logistic Regression
                          - Starspace
                        default: Logistic Regression
                      minCharLen:
                        type: integer
                        title: Minimum No. of Characters
                        description: >-
                          Minimum length, in characters, for the text to be
                          included into training.
                        default: 2
                        minimum: 1
                        exclusiveMinimum: false
                      maxCharLen:
                        type: integer
                        title: Maximum No. of Characters
                        description: >-
                          Maximum length, in characters, of the training text.
                          Texts longer than this value will be truncated.
                        default: 100000
                        minimum: 1
                        exclusiveMinimum: false
                      lowercaseTexts:
                        type: boolean
                        title: Lowercase Text
                        description: Select if you want the text to be lowercased
                        default: true
                      unidecodeTexts:
                        type: boolean
                        title: Unidecode Text
                        description: Select if you want the text to be unidecoded
                        default: true
                      minClassSize:
                        type: integer
                        title: Minimum no. of examples per class
                        description: >-
                          Minimum number of samples that class should have to be
                          included into training. Otherwise the class and all
                          its samples are dropped.
                        default: 5
                        minimum: 2
                        exclusiveMinimum: false
                      valSize:
                        type: number
                        title: Validation set size
                        description: >-
                          Size of the validation dataset. Provide a float (0, 1)
                          if you want to sample as a fraction, or an integer >=
                          1 if you want to sample exact number of records.
                        default: 0.1
                      topK:
                        type: integer
                        title: Number of Output classes
                        description: >-
                          Number of most probable output classes to assign to
                          each sample along with their scores.
                        default: 1
                        minimum: 1
                        exclusiveMinimum: false
                      featurizerType:
                        type: string
                        title: Featurizer
                        description: >-
                          The type of featurizer to use. TFIDF will compute both
                          term-frequency and inverse document-frequency, whereas
                          Count will use only term-frequency
                        enum:
                          - tfidf
                          - count
                        default: tfidf
                        hints:
                          - advanced
                      useCharacters:
                        type: boolean
                        title: Use Characters
                        description: >-
                          Whether to use the characters or word analyzer. Use
                          words if the text is long. Using characters on long
                          text can significantly increase vectorization time and
                          memory requirements.
                        default: true
                      tokenPattern:
                        type: string
                        title: Token filtering pattern
                        description: Regex pattern for filtering tokens.
                        default: (?u)\b\w\w+\b
                        hints:
                          - hidden
                      minDf:
                        type: number
                        title: Min Document Frequency
                        description: >-
                          Minimum Df for token to be considered. Provide a float
                          (0,1) if you want to specify as a fraction, otherwise
                          integer >= 1 to specify the exact number of documents
                          in which a token should occur.
                        default: 1
                        hints:
                          - advanced
                      maxDf:
                        type: number
                        title: Max Document Frequency
                        description: >-
                          Maximum Df for token to be considered. Provide a float
                          (0,1) if you want to specify as a fraction, otherwise
                          integer >= 1 to specify the exact number of documents
                          in which a token should occur
                        default: 0.8
                        hints:
                          - advanced
                      minNgram:
                        type: integer
                        title: Min Ngram size
                        description: Minimum word or character ngram size to be used.
                        minimum: 1
                        exclusiveMinimum: false
                      maxNgram:
                        type: integer
                        title: Max Ngram size
                        description: Maximum word or character ngram size to be used.
                        minimum: 1
                        exclusiveMinimum: false
                      maxFeatures:
                        type: integer
                        title: Maximum Vocab Size
                        description: >-
                          Maximum number of tokens (including word or character
                          ngrams) to consider for the vocabulary. Less frequent
                          tokens will be omitted.
                        default: 250000
                        minimum: 1
                        exclusiveMinimum: false
                      norm:
                        type: string
                        title: Use Norm
                        description: Select the norm method to use.
                        enum:
                          - None
                          - L1
                          - L2
                        default: None
                        hints:
                          - advanced
                      smoothIdf:
                        type: boolean
                        title: Smooth IDF
                        description: >-
                          Smooth IDF weights by adding one to document
                          frequencies. Prevents zero divisions.
                        default: true
                        hints:
                          - advanced
                      sublinearTf:
                        type: boolean
                        title: Sublinear TF
                        description: >-
                          Whether to apply sublinear scaling to TF, i.e. replace
                          tf with 1 + log(tf). It usually helps when characters
                          are used. 
                        default: true
                        hints:
                          - advanced
                      scaling:
                        type: boolean
                        title: Scale Features
                        description: >-
                          Whether to apply Standard Scaling (X - mean(X)) /
                          std(X) for the features. If the feature vector is
                          sparse (no dimensionality reduction is used), then
                          only division on standard deviation will be applied.
                        default: true
                      dimReduction:
                        type: boolean
                        title: Perform Dimensionality Reduction
                        description: >-
                          Whether to perform dimensionality reduction or not.
                          Truncated SVD is used to reduce dimensionality.
                          Reduces overfitting and training time. Note that
                          sparse vectors will become dense.
                        default: false
                      dimReductionSize:
                        type: integer
                        title: Reduced Dimension Size
                        description: >-
                          The target dimension size of the features after
                          dimensionality reduction.
                        default: 256
                        minimum: 1
                        exclusiveMinimum: false
                      penalty:
                        type: string
                        title: Penalty
                        description: >-
                          Specify the norm used in the penalization. l2 is
                          supported only by the ‘newton-cg’, ‘sag’ and ‘lbfgs’
                          solvers. ‘elasticnet’ is only supported by the ‘saga’
                          solver. Select none, if you don't want to regularize
                          (this is not supported by the `liblinear` solver).
                        enum:
                          - l1
                          - l2
                          - elsaticnet
                          - none
                        default: l2
                        hints:
                          - advanced
                      l1Ratio:
                        type: number
                        title: L1 penalty ratio
                        description: >-
                          Only used with the `elasticnet` penalty. If its value
                          = 0, l2 penalty will be used. If it's value = 1, l1
                          penalty will be used. A value in between will use the
                          appropirate ratio of l1 and l2 penalties.
                        default: 0.5
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      tol:
                        type: number
                        title: Stopping tolerance
                        description: Tolerance for stopping criteria.
                        default: 0.0001
                      reg:
                        type: number
                        title: Regularization term
                        description: >-
                          This is the inverse of regularization strength.
                          Smaller values result in stronger regularization.
                        default: 1
                      useClassWeights:
                        type: boolean
                        title: Use class weights
                        description: >-
                          If true, a weight is applied to each class inversely
                          proportional to its frequency.
                        default: false
                      solver:
                        type: string
                        title: Optimization Algorithm
                        description: >-
                          The optimization algorithm to use to fit to the data.
                          LBFGS and SAGA are good initial choices.
                        enum:
                          - lbfgs
                          - newton-cg
                          - liblinear
                          - sag
                          - saga
                        default: lbfgs
                        hints:
                          - advanced
                      multiClass:
                        type: string
                        title: Loss Method
                        description: >-
                          Whether to train a binary classifier for each class or
                          use a multinomial loss. ‘auto’ selects ‘ovr’ if the
                          data is binary, or if algorithm=’liblinear’, and
                          otherwise selects ‘multinomial’.
                        enum:
                          - auto
                          - ovr
                          - multinomial
                        default: auto
                        hints:
                          - advanced
                      maxIter:
                        type: integer
                        title: Maximum iterations for algorithm
                        description: >-
                          Maximum number of iterations taken for the
                          optimization algorithm to converge.
                        default: 200
                        minimum: 1
                        exclusiveMinimum: false
                      textLayersSizes:
                        type: string
                        title: Hidden sizes before text embedding
                        description: >-
                          Sizes of hidden layers before the embedding layer for
                          text. Specify as a list of numbers for multiple layers
                          or a single number for 1 layer. Leave blank if no
                          hidden layers are required.
                        default: '[256, 128]'
                        pattern: ^(\[(((\d)*,\s*)*(\d+)+)?\])?$
                      labelLayersSizes:
                        type: string
                        title: Hidden sizes before class embedding
                        description: >-
                          Sizes of hidden layers before the embedding layer for
                          classes. Specify as a list of numbers for multiple
                          layers or a single number for 1 layer. Leave blank if
                          no hidden layers are required.
                        default: '[]'
                        pattern: ^(\[(((\d)*,\s*)*(\d+)+)?\])?$
                      embeddingsSize:
                        type: integer
                        title: Embedding size
                        description: >-
                          Dimension size of final embedding vectors for text and
                          class.
                        default: 100
                        minimum: 1
                        exclusiveMinimum: false
                      regTerm:
                        type: number
                        title: Regularization Term
                        description: Scale of L2 regularization
                        default: 0.002
                      dropout:
                        type: number
                        title: Dropout
                        description: Probability for applying dropout regularization.
                        default: 0.2
                      embeddingReg:
                        type: number
                        title: Embedding regularization
                        description: >-
                          The scale of how critical the algorithm should be of
                          minimizing the maximum similarity between embeddings
                          of different classes
                        default: 0.8
                        hints:
                          - advanced
                      minBatchSize:
                        type: integer
                        title: Minimum Batch Size
                        description: >-
                          The smallest batch size with which to start training.
                          Batch size will be increased linearly every epoch,
                          upto the maximum batch size specified.
                        default: 64
                        minimum: 1
                        exclusiveMinimum: false
                      maxBatchSize:
                        type: integer
                        title: Maximum Batch Size
                        description: >-
                          The largest batch size to use during training. Batch
                          size will be increased linearly every epoch, upto the
                          maximum batch size specified.
                        default: 128
                        minimum: 1
                        exclusiveMinimum: false
                      numEpochs:
                        type: integer
                        title: Number of training epochs
                        description: Number of epochs for which to train the model.
                        default: 40
                        minimum: 1
                        exclusiveMinimum: false
                      muPos:
                        type: number
                        title: Maximum correct class similarity
                        description: >-
                          How similar algorithm should try to make embedding
                          vectors for correct classes.  The algorithm will try
                          to maximize similarities so that it's higher than the
                          value specified here.
                        default: 0.8
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      muNeg:
                        type: number
                        title: Maximum negative class similarity
                        description: >-
                          How similar algorithm should try to make embedding
                          vectors for negative classes.  The algorithm will try
                          to minimize similarities so that it's lower than the
                          value specified here.
                        default: -0.4
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      similarityType:
                        type: string
                        title: Similarity type
                        description: >-
                          Type of similarity to use to compare the embedded
                          vectors.
                        enum:
                          - cosine
                          - inner
                        default: cosine
                        hints:
                          - advanced
                      numNeg:
                        type: integer
                        title: Number of negative classes for training
                        description: >-
                          Number of negative classes to use during training to
                          minimize their similarity to the input text. Should be
                          less than the total number of classes.
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      useMaxNegSim:
                        type: boolean
                        title: Only minimize max. negative similarity
                        description: >-
                          If true, only the maximum similarity for negative
                          classes will be minimized. If unchecked, all negative
                          similarities will be used.
                        default: true
                        hints:
                          - advanced
                      modelReplicas:
                        type: integer
                        title: Model replicas
                        description: >-
                          How many replicas of the model should be deployed by
                          Seldon Core
                        default: 1
                        minimum: 1
                        exclusiveMinimum: false
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-classification
                        default: argo-classification
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - deployModelName
                          - trainingCollection
                          - trainingFormat
                          - modelReplicas
                          - secretName
                      - label: Training Data Settings
                        properties:
                          - trainingDataFilterQuery
                          - trainingSampleFraction
                          - randomSeed
                          - textField
                          - labelField
                      - label: Preprocessing Parameters
                        properties:
                          - minCharLen
                          - maxCharLen
                          - minClassSize
                          - lowercaseTexts
                          - unidecodeTexts
                      - label: Eval and Output Parameters
                        properties:
                          - valSize
                          - topK
                      - label: Vectorization Parameters
                        properties:
                          - featurizerType
                          - useCharacters
                          - stopwordsBlobName
                          - minDf
                          - maxDf
                          - minNgram
                          - maxNgram
                          - maxFeatures
                          - norm
                          - smoothIdf
                          - sublinearTf
                          - scaling
                          - dimReduction
                          - dimReductionSize
                      - label: Logistic Regression Parameters
                        properties:
                          - penalty
                          - l1Ratio
                          - tol
                          - reg
                          - useClassWeights
                          - solver
                          - multiClass
                          - maxIter
                      - label: Starspace Parameters
                        properties:
                          - textLayersSizes
                          - labelLayersSizes
                          - embeddingsSize
                          - regTerm
                          - dropout
                          - embeddingReg
                          - minBatchSize
                          - maxBatchSize
                          - numEpochs
                          - muPos
                          - muNeg
                          - similarityType
                          - numNeg
                          - useMaxNegSim
                  - type: object
                    title: Delete Ray Model Deployment
                    description: Removes a Ray model deployment from the cluster
                    required:
                      - id
                      - modelName
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelName:
                        type: string
                        title: Model name
                        description: The model name of the Ray deployment to delete
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-delete-ray-model
                        default: argo-delete-ray-model
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Delete Collections in Milvus (deprecated)
                    description: Deletes specified collections in Milvus
                    required:
                      - id
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      collections-list:
                        type: array
                        title: Collections
                        description: List of collections in Milvus that should be deleted.
                        items:
                          type: string
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-milvus-delete-collections
                        default: argo-milvus-delete-collections
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Ground Truth
                    description: >-
                      Use this job when you want to estimate ground truth
                      queries using click and query signals with document
                      relevance per query determined using a click/skip formula.
                      Pair this job with ranking metrics job to calculate
                      relevance metrics, such as nDCG
                    required:
                      - id
                      - signalsCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      signalsCollection:
                        type: string
                        title: Signals collection
                        description: >-
                          Collection containing click signals and the associated
                          search log identifier
                        minLength: 1
                      searchLogsAddOpts:
                        type: object
                        title: Search Logs and Options
                        description: >-
                          Additional options to use while loading search logs
                          collection
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      signalsAddOpts:
                        type: object
                        title: Additional Signals Options
                        description: >-
                          Additional options to use while loading signals
                          collection
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      searchLogsPipeline:
                        type: string
                        title: Search Logs Pipeline
                        description: Pipeline id associated with search log entries
                        hints:
                          - advanced
                        minLength: 1
                      joinKeySearchLogs:
                        type: string
                        title: Join Key (Query Signals)
                        description: Join key of query signals in the signals collection
                        default: id
                        hints:
                          - advanced
                      joinKeySignals:
                        type: string
                        title: Join Key (Click Signals)
                        description: Join key of click signals in the signals collection
                        default: fusion_query_id
                        hints:
                          - advanced
                      filterQueries:
                        type: array
                        title: Filter Queries
                        description: >-
                          Filter queries to apply while choosing top queries
                          from query signals in signals collection
                        hints:
                          - advanced
                        items:
                          type: string
                      topQueriesLimit:
                        type: integer
                        title: Top Queries Limit
                        description: >-
                          Total number of queries to pick for Ground truth
                          calculations
                        default: 100
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - ground_truth
                        default: ground_truth
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - signalsCollection
                      - label: Additional Options
                        properties:
                          - searchLogsPipeline
                          - joinKeySearchLogs
                          - joinKeySignals
                          - searchLogsAddOpts
                          - signalsAddOpts
                          - filterQueries
                          - topQueriesLimit
                  - type: object
                    title: Create Seldon Core Model Deployment
                    description: Deploys a Seldon Core Model into the Fusion cluster
                    required:
                      - id
                      - deployModelName
                      - modelDockerRepo
                      - modelDockerImage
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      deployModelName:
                        type: string
                        title: Model name
                        description: >-
                          The model name of the Seldon Core deployment to deploy
                          (must be a valid lowercased DNS subdomain with no
                          underscores).
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      modelReplicas:
                        type: integer
                        title: Model replicas
                        description: >-
                          How many replicas of the model should be deployed by
                          Seldon Core
                        default: 1
                      modelDockerRepo:
                        type: string
                        title: Docker repository
                        description: >-
                          Defines the Docker repository where the model image is
                          located.
                      modelDockerImage:
                        type: string
                        title: Image name
                        description: Name of the model's docker image
                      modelDockerSecret:
                        type: string
                        title: Kubernetes secret name for model repo
                        description: >-
                          Defines the Kubernetes secret to be used with the
                          Docker repository
                      columnNames:
                        type: string
                        title: Output column names for model
                        description: >-
                          A list of column names that the model generates which
                          the ML Service will return after inference.
                        default: '[output1, output2]'
                      cloudPath:
                        type: string
                        title: Cloud Path
                        description: >-
                          Path to cloud storage location that contains the saved
                          parameters for this model. Supports S3, GCS, or Azure
                          Blob Storage URIs
                        hints:
                          - advanced
                      cloudSecret:
                        type: string
                        title: Kubernetes secret name for cloud storage access
                        description: >-
                          Defines the Kubernetes secret that will be used to
                          access cloud storage
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-deploy-model
                        default: argo-deploy-model
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Trending Recommender
                    description: Trending Recommender
                    required:
                      - id
                      - trainingCollection
                      - dataFormat
                      - refTimeRange
                      - targetTimeRange
                      - countField
                      - typeField
                      - timeField
                      - docIdField
                      - types
                      - recsCount
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing labeled training data
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Solr Fields to Read
                        description: >-
                          Fields to extract from Solr (not used for other
                          formats)
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      refTimeRange:
                        type: integer
                        title: Reference Time Days
                        description: >-
                          Number of reference days: number of days to use as
                          baseline to find trends (calculated from today)
                      targetTimeRange:
                        type: integer
                        title: Target Time Days
                        description: >-
                          Number of target days: number of days to use as target
                          to find trends (calculated from today)
                      numWeeksRef:
                        type: number
                        title: Num Weeks Reference
                        description: >-
                          If using filter queries for reference and target time
                          ranges, enter the value of (reference days / target
                          days) here (if not using filter queries, this will be
                          calculated automatically)
                        hints:
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      countField:
                        type: string
                        title: Event Count Field Name
                        description: >-
                          Field containing the number of times an event (e.g.
                          click) occurs for a particular query; count_i in the
                          raw signal collection or aggr_count_i in the
                          aggregated signal collection.
                        default: aggr_count_i
                        minLength: 1
                      referenceTimeFilterQuery:
                        type: string
                        title: Reference Filter Time Query
                        description: >-
                          Add a Spark SQL filter query here for greater control
                          of time filtering
                        hints:
                          - advanced
                      targetFilterTimeQuery:
                        type: string
                        title: Target Filter Time Query
                        description: >-
                          Add a Spark SQL filter query here for greater control
                          of time filtering
                        hints:
                          - advanced
                      typeField:
                        type: string
                        title: Type field
                        description: Enter type field (default is type)
                        default: aggr_type_s
                      timeField:
                        type: string
                        title: Time field
                        description: Enter time field (default is timestamp_tdt)
                        default: timestamp_tdt
                      docIdField:
                        type: string
                        title: Document ID field
                        description: Enter document id field (default is doc_id)
                        default: doc_id_s
                      types:
                        type: string
                        title: Event types
                        description: >-
                          Enter a comma-separated list of event types to filter
                          on
                        default: click,add
                      recsCount:
                        type: integer
                        title: Recommendation Count
                        description: >-
                          Maximum number of recs to generate (or -1 for no
                          limit)
                        default: 500
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - trending-recommender
                        default: trending-recommender
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - countField
                  - type: object
                    title: Smart Answers Evaluate Pipeline
                    description: Evaluates performance of a configured pipeline
                    required:
                      - id
                      - inputEvaluationCollection
                      - trainingFormat
                      - outputEvaluationCollection
                      - outputFormat
                      - appName
                      - queryPipelineName
                      - collectionName
                      - returnFields
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      inputEvaluationCollection:
                        type: string
                        title: Input Evaluation Data Path
                        description: >-
                          Cloud storage path or Solr collection to pull labeled
                          data for use in evaluation
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Input data format
                        description: The format of the input data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      outputEvaluationCollection:
                        type: string
                        title: Output Evaluation Data Path
                        description: >-
                          Cloud storage path or Solr collection to store
                          evaluation results (recommended collection is
                          job_reports)
                        minLength: 1
                      partitionFields:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      batchSize:
                        type: string
                        title: Output Batch Size
                        description: >-
                          If writing to solr, this field defines the batch size
                          for documents to be pushed to solr.
                        hints:
                          - advanced
                      outputFormat:
                        type: string
                        title: Output format
                        description: The format of the output data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`
                        hints:
                          - code/sql
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Sampling proportion
                        description: >-
                          The proportion of data to be sampled from the full
                          dataset. Use a value between 0 and 1 for a proportion
                          (e.g. 0.5 for 50%), or for a specific number of
                          examples, use an integer larger than 1. Leave blank
                          for no sampling
                        hints:
                          - advanced
                      seed:
                        type: integer
                        title: Sampling Seed
                        description: Random seed for sampling
                        default: 12345
                        hints:
                          - advanced
                      testQuestionFieldInFile:
                        type: string
                        title: Test Question Field
                        description: >-
                          Defines the field in the collection containing the
                          test question
                        default: question
                      matchFieldInFile:
                        type: string
                        title: Ground Truth Field
                        description: >-
                          Field which contains id or text of the ground truth
                          answer in the evaluation collection
                        default: answer_id
                      matchFieldInFusion:
                        type: string
                        title: Answer or id Field in Fusion
                        description: >-
                          Field name in Fusion which contains answer id or text
                          for matching ground truth answer id or text in the
                          evaluation collection
                        default: doc_id
                      appName:
                        type: string
                        title: App name
                        description: Fusion app where indexed documents or QA pairs live.
                      queryPipelineName:
                        type: string
                        title: Fusion Query Pipeline
                        description: >-
                          Configured query pipeline name that should be used for
                          evaluation
                      collectionName:
                        type: string
                        title: Main Collection
                        description: >-
                          Fusion collection where indexed documents or QA pairs
                          live
                      additionalParams:
                        type: string
                        title: Additional query parameters
                        description: >-
                          Additional query parameters to pass to return
                          resultsfrom Fusion. Please specify in dictionary
                          format: e.g. { "rowsFromSolrToRerank": 20,"fq":
                          "type:answer" }"
                        hints:
                          - advanced
                      returnFields:
                        type: string
                        title: Return fields
                        description: >-
                          Fields (comma-separated) that should be returned from
                          the main collection (e.g. question, answer). The job
                          will add them to the output evaluation
                      rankingScoreField:
                        type: string
                        title: Ranking score
                        description: Score to be used for ranking and evaluation
                        default: ensemble_score
                        hints:
                          - advanced
                      metricsList:
                        type: string
                        title: Metrics list
                        description: >-
                          List of metrics that should be computed during
                          evaluation. e.g.["recall","precision","map","mrr"]
                        default: '["recall","map","mrr"]'
                        hints:
                          - advanced
                      kList:
                        type: string
                        title: Metrics@k list
                        description: >-
                          The k retrieval position that will be used to compute
                          for each metric
                        default: '[1,3,5]'
                        hints:
                          - advanced
                      doWeightsSelection:
                        type: boolean
                        title: Perform weights selection
                        description: >-
                          Whether to perform grid search to find the best
                          weights combination for ranking scores for query
                          pipeline's Compute Mathematical Expression stage"
                        default: false
                        hints:
                          - advanced
                      solrScaleFunc:
                        type: string
                        title: Solr scale function
                        description: >-
                          Function used in the pipeline to scale Solr scores.
                          E.g., scale by max Solr score retrieved (max), scale
                          by log with base 10 (log10) or take squre root of
                          score (pow0.5)
                        default: max
                      scoreListForWeights:
                        type: string
                        title: List of ranking scores for ensemble
                        description: >-
                          Ranking scores (comma-separated) used for ensemble in
                          the query pipeline's Compute Mathematical Expression
                          stage. The job will perform weights selection for the
                          listed scores
                        default: score,vectors_distance
                      targetRankingMetric:
                        type: string
                        title: Target metric to use for weight selection
                        description: >-
                          Target ranking metric to optimize during weights
                          selection
                        default: mrr@3
                      fetcherType:
                        type: string
                        title: Fetcher Type to use with query evaluation
                        default: query-service
                        hints:
                          - hidden
                      useLabelingResolution:
                        type: boolean
                        title: Use Labeling Resolution
                        description: >-
                          Check this to determine similar questions and similar
                          answers via labeling resolution and graph connected
                          components. Does not work well with signals data.
                        default: false
                        hints:
                          - advanced
                      useConcurrentQuerying:
                        type: boolean
                        title: Use Concurrent Querying
                        description: >-
                          Check this option if you want to make concurrent
                          queries to Fusion. It will greatly speed up the job at
                          the cost of increased load on Fusion. Use with
                          caution.
                        default: false
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-qna-evaluate
                        default: argo-qna-evaluate
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input / Output Parameters
                        properties:
                          - inputEvaluationCollection
                          - trainingFormat
                          - outputEvaluationCollection
                          - outputFormat
                          - trainingDataFilterQuery
                          - testQuestionFieldInFile
                          - matchFieldInFile
                          - trainingSampleFraction
                          - seed
                          - useLabelingResolution
                          - partitionFields
                          - batchSize
                          - secretName
                      - label: Query Pipeline Input / Output Parameters
                        properties:
                          - appName
                          - collectionName
                          - queryPipelineName
                          - matchFieldInFusion
                          - additionalParams
                          - returnFields
                          - useConcurrentQuerying
                      - label: Metrics
                        properties:
                          - rankingScoreField
                          - metricsList
                          - kList
                          - doWeightsSelection
                          - solrScaleFunc
                          - scoreListForWeights
                          - targetRankingMetric
                  - type: object
                    title: Script
                    description: Run a custom Scala script as a Fusion Job.
                    required:
                      - id
                      - script
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      script:
                        type: string
                        title: Scala Script
                        description: >-
                          Custom script written in Scala to be executed in
                          Fusion as a Spark job.
                        hints:
                          - lengthy
                          - code/scala
                        minLength: 1
                      shellOptions:
                        type: array
                        title: Spark Shell Options
                        description: >-
                          Additional options to pass to the Spark shell when
                          running this job.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      interpreterParams:
                        type: array
                        title: Interpreter Params
                        description: Bind the key/values to the Scala interpreter
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - script
                        default: script
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Synonym Detection (Deprecated)
                    description: >-
                      Use this job to generate synonym and similar query pairs.
                      This job is deprecated.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - misspellingSQLDataFormat
                      - phraseSQLDataFormat
                      - countField
                      - docIdField
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Input Collection
                        description: >-
                          Collection containing queries, document id and event
                          counts. Can be either signal aggregation collection or
                          raw signals collection.
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Query Field Name
                        description: >-
                          Field containing queries. Change to query to use
                          against raw signals
                        default: query_s
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr, Spark SQL expression for all other data sources
                        default: '*:*'
                        hints:
                          - dummy
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Collection to store synonym and similar query pairs.
                        hints:
                          - dummy
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - hidden
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      misspellingCollection:
                        type: string
                        title: Misspelling Job Result Collection
                        description: >-
                          Solr collection containing reviewed result of Token
                          and phrase spell correction job. Defaults to the
                          query_rewrite_staging collection for the app.
                      misspellingsFilterQuery:
                        type: string
                        title: Misspelling Job Result Filter Query
                        description: >-
                          Solr query to additionally filter the misspelling
                          results. Defaults to reading all approved spell
                          corrections.
                        default: type:spell
                      keyPhraseCollection:
                        type: string
                        title: Phrase Extraction Job Result Collection
                        description: >-
                          Solr collection containing reviewed result of Phrase
                          extraction job. Defaults to the query_rewrite_staging
                          collection for the app.
                      keyPhraseFilterQuery:
                        type: string
                        title: Phrase Extraction Job Result Filter Query
                        description: >-
                          Solr query to additionally filter the phrase
                          extraction results. Defaults to reading all approved
                          phrases.
                        default: type:phrase
                      misspellingSQL:
                        type: string
                        title: Spark SQL filter query for misspelling data
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spell_input
                        default: >-
                          SELECT surface_form AS misspelling_s, output AS
                          correction_s FROM spell_input WHERE doc_type  =
                          'query_rewrite' AND type = 'spell' AND review IN
                          ('approved' OR 'auto')
                        hints:
                          - code/sql
                          - advanced
                      misspellingSQLDataFormat:
                        type: string
                        title: Misspelling Data format
                        description: >-
                          Spark-compatible format that contains spelling data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      phraseSQL:
                        type: string
                        title: Spark SQL filter query for phrase data
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as phrase_input
                        default: >-
                          SELECT surface_form AS phrases_s, coalesce(confidence,
                          lit(1d)) AS likelihood_d, coalesce(word_count,
                          lit(1d)) AS word_num_i FROM phrase_input WHERE
                          doc_type  = 'query_rewrite' AND type = 'phrase' AND
                          review IN ('approved' OR 'auto')
                        hints:
                          - code/sql
                          - advanced
                      phraseSQLDataFormat:
                        type: string
                        title: Phrase Data format
                        description: >-
                          Spark-compatible format that contains phrase data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      countField:
                        type: string
                        title: Event Count Field Name
                        description: >-
                          Solr field containing number of events (e.g., number
                          of clicks). Change to count_i when running against raw
                          signals
                        default: aggr_count_i
                      docIdField:
                        type: string
                        title: Document id Field Name
                        description: >-
                          Solr field containing document id that user clicked.
                          Change to doc_id for raw signal collection
                        default: 'doc_id_s '
                      overlapThreshold:
                        type: number
                        title: Query Similarity Threshold
                        description: >-
                          The threshold above which query pairs are consider
                          similar. We can get more synonym pairs if increase
                          this value but quality may get reduced.
                        default: 0.5
                        hints:
                          - advanced
                      similarityThreshold:
                        type: number
                        title: Synonym Similarity Threshold
                        description: >-
                          The threshold above which synonym pairs are consider
                          similar. We can get more synonym pairs if increase
                          this value but quality may get reduced.
                        default: 0.01
                        hints:
                          - advanced
                      minQueryCount:
                        type: integer
                        title: Query Clicks Threshold
                        description: >-
                          The min number of clicked documents needed for
                          comparing queries.
                        default: 5
                        hints:
                          - advanced
                      keywordsBlobName:
                        type: string
                        title: Keywords Blob Store
                        description: >-
                          Name of the keywords blob resource. Typically, this
                          should be a csv file uploaded to blob store in a
                          specific format. Check documentation for more details
                          on format and uploading to blob store.
                        reference: blob
                        blobType: file:spark
                      synonymBlobName:
                        type: string
                        title: Custom Synonym Blob Store
                        description: >-
                          Name of the custom synonym blob resource. This is a
                          Solr synonym file that will be used in the synonym
                          detection job and will override any generated synonyms
                          (indicated by a 'supplied' field in the Rules UI).
                        hints:
                          - advanced
                        reference: blob
                        blobType: file:spark
                      analyzerConfigQuery:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenizing queries
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [ { "name":
                          "LetterTokLowerStem","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "letter"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "length", "min": "2", "max": "32767" },{ "type":
                          "KStem" }] }],"fields": [{ "regex": ".+", "analyzer":
                          "LetterTokLowerStem" } ]}
                        hints:
                          - lengthy
                          - advanced
                          - code/json
                        minLength: 1
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      enableAutoPublish:
                        type: boolean
                        title: Enable auto-publishing
                        description: >-
                          If true, automatically publishes rewrites for rules.
                          Default is false to allow for initial human-aided
                          reviewing
                        default: false
                        hints:
                          - advanced
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - synonymDetection
                        default: synonymDetection
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - countField
                      - label: Model Tuning Parameters
                        properties:
                          - overlapThreshold
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfigQuery
                      - label: Misc. Parameters
                        properties:
                          - keywordsBlobName
                  - type: object
                    title: Cluster Labeling
                    description: >-
                      Use this job when you already have clusters or
                      well-defined document categories, and you want to discover
                      and attach keywords to see representative words within
                      those existing clusters. (If you want to create new
                      clusters, use the Document Clustering job.)
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - clusterIdField
                      - outputCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: >-
                          Solr Collection containing documents with defined
                          categories or clusters
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to detect keywords from
                        description: >-
                          Field containing data from which to discover keywords
                          for the cluster
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store output data to
                        minLength: 1
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelId:
                        type: string
                        title: Model ID
                        description: >-
                          Identifier for the model to be trained; uses the
                          supplied Spark Job ID if not provided.
                        hints:
                          - advanced
                        minLength: 1
                      clusterIdField:
                        type: string
                        title: Existing Document Category Field
                        description: >-
                          Field that contains your existing cluster IDs or
                          document categories.
                        minLength: 1
                      analyzerConfig:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" },{ "type": "length", "min": "2", "max":
                          "32767" },{ "type": "fusionstop", "ignoreCase":
                          "true", "format": "snowball", "words":
                          "org/apache/lucene/analysis/snowball/english_stop.txt"
                          }] }],"fields": [{ "regex": ".+", "analyzer":
                          "StdTokLowerStop" } ]}
                        hints:
                          - lengthy
                          - code/json
                        minLength: 1
                      clusterLabelField:
                        type: string
                        title: Top Unique Terms Field Name
                        description: >-
                          Output field name for top frequent terms that are
                          (mostly) unique for each cluster.
                        default: cluster_label
                      freqTermField:
                        type: string
                        title: Top Frequent Terms Field Name
                        description: >-
                          Output field name for top frequent terms in each
                          cluster. These may overlap with other clusters.
                        default: freq_terms
                      minDF:
                        type: number
                        title: Min Doc Support
                        description: >-
                          Min number of documents the term has to show up.
                          value<1.0 denotes a percentage, value=1.0 denotes
                          100%, value>1.0 denotes the exact number.
                        default: 5
                      maxDF:
                        type: number
                        title: Max Doc Support
                        description: >-
                          Max number of documents the term can show up.
                          value<1.0 denotes a percentage, value=1.0 denotes
                          100%, value>1.0 denotes the exact number.
                        default: 0.75
                      norm:
                        type: integer
                        title: Vector normalization
                        description: >-
                          p-norm to normalize vectors with (choose -1 to turn
                          normalization off)
                        enum:
                          - -1
                          - 0
                          - 1
                          - 2
                        default: 2
                        hints:
                          - advanced
                      numKeywordsPerLabel:
                        type: integer
                        title: Number of Keywords for Each Cluster
                        description: Number of Keywords needed for labeling each cluster.
                        default: 5
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - cluster_labeling
                        default: cluster_labeling
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - clusterIdField
                          - freqTermField
                          - clusterLabelField
                      - label: Model Tuning Parameters
                        properties:
                          - maxDF
                          - minDF
                          - norm
                          - numKeywordsPerLabel
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: Build Training Data
                    description: >-
                      Use this job to build training data for query
                      classification by joining signals with catalog.
                    required:
                      - id
                      - fieldToVectorize
                      - catalogPath
                      - catalogFormat
                      - signalsPath
                      - outputPath
                      - categoryField
                      - catalogIdField
                      - itemIdField
                      - countField
                      - analyzerConfig
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing labeled training data
                        hints:
                          - dummy
                          - hidden
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Query Field
                        description: Field containing query strings.
                        default: query_s
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Signals Format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        hints:
                          - dummy
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Signal Data Filter Query
                        description: >-
                          Solr query to additionally filter signals. For
                          non-solr data source use SPARK SQL FILTER QUERY under
                          Advanced to filter results
                        default: '*:*'
                        hints:
                          - dummy
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                        hints:
                          - dummy
                          - hidden
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - dummy
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - dummy
                          - hidden
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      catalogPath:
                        type: string
                        title: Catalog Path
                        description: >-
                          Catalog collection or cloud storage path which
                          contains item categories.
                      catalogFormat:
                        type: string
                        title: Catalog Format
                        description: >-
                          Spark-compatible format that contains catalog data
                          (like 'solr', 'parquet', 'orc' etc)
                      signalsPath:
                        type: string
                        title: Signals Path
                        description: >-
                          Signals collection or cloud storage path which
                          contains item categories.
                      outputPath:
                        type: string
                        title: Output Path
                        description: >-
                          Output collection or cloud storage path which contains
                          item categories.
                      categoryField:
                        type: string
                        title: Category Field in Catalog
                        description: Item category field in catalog.
                      catalogIdField:
                        type: string
                        title: Item Id Field in Catalog
                        description: >-
                          Item Id field in catalog, which will be used to join
                          with signals
                      itemIdField:
                        type: string
                        title: Item Id Field in Signals
                        description: >-
                          Item Id field in signals, which will be used to join
                          with catalog.
                        default: doc_id_s
                      countField:
                        type: string
                        title: Count Field in Signals
                        description: Count Field in raw or aggregated signals.
                        default: aggr_count_i
                      topCategoryProportion:
                        type: number
                        title: Top Category Proportion
                        description: >-
                          Proportion of the top category has to be among all
                          categories.
                        default: 0.5
                      topCategoryThreshold:
                        type: integer
                        title: Minimum Count
                        description: Minimum number of query,category pair counts.
                        default: 1
                        minimum: 1
                        exclusiveMinimum: false
                      analyzerConfig:
                        type: string
                        title: Lucene Text Analyzer
                        description: The style of text analyzer you would like to use.
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" }] }],"fields": [{
                          "regex": ".+", "analyzer": "StdTokLowerStop" } ]}
                        hints:
                          - lengthy
                          - code/json
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - build-training
                        default: build-training
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                          - catalogPath
                          - catalogFormat
                          - signalsPath
                          - outputPath
                          - dataOutputFormat
                          - partitionCols
                          - sparkSQL
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - categoryField
                          - catalogIdField
                          - itemIdField
                          - countField
                      - label: Training Parameters
                        properties:
                          - topCategoryProportion
                          - topCategoryThreshold
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                  - type: object
                    title: Transfer Collection To Cloud
                    description: >-
                      Transfer Collection to Cloud Storage, for collections that
                      need to be migrated or copied to cloud storage
                    required:
                      - id
                      - inputCollection
                      - outputLocation
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      inputCollection:
                        type: string
                        title: Collection
                        description: Solr collection to copy
                        minLength: 1
                      outputLocation:
                        type: string
                        title: Output Location
                        description: >-
                          URI of output location (e.g. s3a://..., gs://...,
                          wasb://...)
                        minLength: 1
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                      outputFormat:
                        type: string
                        title: Output format
                        description: Format for cloud output (e.g. parquet, json, csv)
                        default: parquet
                      sparkPartitions:
                        type: integer
                        title: Set minimum Spark partitions for input
                        description: >-
                          Spark will re-partition the input to have this number
                          of partitions. Increase for greater parallelism
                        default: 200
                        hints:
                          - advanced
                      readOptions:
                        type: array
                        title: Read Options
                        description: Options used when reading input from Solr
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - transfer
                        default: transfer
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: BPR Recommender
                    description: >-
                      Use this job when you want to compute user recommendations
                      or item similarities using a Bayesian Personalized Ranking
                      recommender. You can also implement a user-to-item
                      recommender in the advanced section of this job’s
                      configuration UI.
                    required:
                      - id
                      - trainingCollection
                      - trainingFormat
                      - outputFormat
                      - userIdField
                      - itemIdField
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      outputBatchSize:
                        type: string
                        title: Output Batch Size
                        description: Batch size of documents when pushing results to solr
                        default: '15000'
                        hints:
                          - advanced
                      jobRunName:
                        type: string
                        title: Job Run Name
                        description: >-
                          Identifier for this job run. Use it to filter
                          recommendations from particular runs.
                        hints:
                          - advanced
                      trainingCollection:
                        type: string
                        title: Training data path
                        description: >-
                          Solr collection or cloud storage path where training
                          data is present.
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Training data format
                        description: The format of the training data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      outputUserRecsCollection:
                        type: string
                        title: Items-Users Output Path
                        description: >-
                          Solr collection or cloud storage path to store
                          batch-predicted user/item recommendations (if absent,
                          none computed). Specify at least one of Items-Users
                          Output Collection or Items-Items Output Collection.
                        minLength: 1
                      outputItemSimCollection:
                        type: string
                        title: Item-Items Output Path
                        description: >-
                          Solr collection or cloud storage path to store
                          batch-computed item/item similarities (if absent, none
                          computed). Specify at least one of Items-Users Output
                          Collection or Items-Items Output Collection.
                        minLength: 1
                      outputFormat:
                        type: string
                        title: Output data format
                        description: The format of the output data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      partitionFields:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output.
                        hints:
                          - advanced
                      numRecsPerUser:
                        type: integer
                        title: No. of recs per user
                        description: Number of recommendations that will be saved per user.
                        default: 10
                        minimum: 0
                        exclusiveMinimum: false
                      userTopkAnn:
                        type: integer
                        title: No. of User Recs to Compute for Filtering
                        description: >-
                          Applies only when Filter Already Clicked Items is
                          enabled. This is used to fetch additional
                          recommendations so that the value specified for the
                          Number of Recommendations Per User is most likely
                          satisfied with filtering turned on.
                        hints:
                          - advanced
                        minimum: 0
                        exclusiveMinimum: false
                      numSimsPerItem:
                        type: integer
                        title: No. of recs per item
                        description: Number of recommendations that will be saved per item.
                        default: 10
                        minimum: 0
                        exclusiveMinimum: false
                      deleteOldRecs:
                        type: boolean
                        title: Delete Old Recommendations
                        description: >-
                          Should previous recommendations be deleted. If this
                          box is unchecked, then old recommendations will not be
                          deleted but new recommendations will be appended with
                          a different Job ID. Both sets of recommendations will
                          be contained within the same collection. Will only
                          work when output path is solr.
                        default: true
                      excludeFromDeleteFilter:
                        type: string
                        title: Exclude from Delete Filter
                        description: >-
                          If the 'Delete Old Recommendations' flag is enabled,
                          then use this query filter to identify existing
                          recommendation docs to exclude from delete. The filter
                          should identify recommendation docs you want to keep.
                        hints:
                          - advanced
                      filterClicked:
                        type: boolean
                        title: Filter already clicked items
                        description: >-
                          Whether to filter out already clicked items in item
                          recommendations for user. Takes more time but
                          drastically improves quality.
                        default: true
                        hints:
                          - advanced
                      weightField:
                        type: string
                        title: Training Collection Counts/Weights Field
                        description: >-
                          Solr field name containing stored counts/weights the
                          user has for that item. This field is used as weight
                          during training
                        default: aggr_count_i
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`.
                        hints:
                          - code/sql
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: Choose a fraction of the data for training.
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      userIdField:
                        type: string
                        title: Training Collection User Id Field
                        description: >-
                          Solr field name in the training collection that
                          contains stored User ID.
                        default: user_id_s
                        minLength: 1
                      itemIdField:
                        type: string
                        title: Training Collection Item Id Field
                        description: >-
                          Solr field name in the training collection that
                          contains stored Item ID.
                        default: item_id_s
                        minLength: 1
                      randomSeed:
                        type: integer
                        title: Random Seed
                        description: >-
                          Pseudorandom determinism fixed by keeping this seed
                          constant
                        default: 12345
                        hints:
                          - advanced
                      itemMetadataFields:
                        type: array
                        title: Item Metadata Fields
                        description: >-
                          List of item metadata fields to include in the
                          recommendation output documents. WARNING: Adding many
                          fields can lead to huge output sizes or OOM issues.
                        hints:
                          - advanced
                        items:
                          type: string
                      itemMetadataCollection:
                        type: string
                        title: Item Metadata Path
                        description: >-
                          Cloud storage path or Solr collection containing item
                          metadata fields you want to add to the recommendation
                          output documents. Leave blank and fill in the metadata
                          fields if you want to fetch data from the training
                          collection. Join field needs to be specified.
                        hints:
                          - advanced
                      itemMetadataFormat:
                        type: string
                        title: Metdata format
                        description: The format of the metadata - solr, parquet etc.
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      itemMetadataJoinField:
                        type: string
                        title: Item Metadata Join Field
                        description: >-
                          Name of field in the item metadata collection to join
                          on.
                        hints:
                          - advanced
                      performANN:
                        type: boolean
                        title: Perform approximate nearest neighbor search
                        description: >-
                          Whether to perform approximate nearest neighbor search
                          (ANN). ANN will drastically reduce training time, but
                          accuracy will drop a little. Disable only if training
                          dataset is very small.
                        default: true
                      maxNeighbors:
                        type: integer
                        title: Max neighbors for indexing
                        description: >-
                          If perform ANN, size of the potential neighbors for
                          the indexing phase. Higher value leads to better
                          recall and shorter retrieval times (at the expense of
                          longer indexing time).Reasonable range: 5~100
                        hints:
                          - advanced
                        maximum: 2000
                        exclusiveMaximum: false
                        minimum: 100
                        exclusiveMinimum: false
                      searchNN:
                        type: integer
                        title: Search Depth
                        description: >-
                          If perform ANN, the depth of search used to find
                          neighbors. Higher value improves recall at the expense
                          of longer retrieval time.Reasonable range: 100~2000
                        hints:
                          - advanced
                        maximum: 2000
                        exclusiveMaximum: false
                        minimum: 100
                        exclusiveMinimum: false
                      indexNN:
                        type: integer
                        title: Indexing Depth
                        description: >-
                          If perform ANN, the depth of constructed index. Higher
                          value improves recall at the expense of longer
                          indexing time.Reasonable range: 100~2000
                        hints:
                          - advanced
                        maximum: 2000
                        exclusiveMaximum: false
                        minimum: 100
                        exclusiveMinimum: false
                      factors:
                        type: integer
                        title: Dimension of latent factors
                        description: >-
                          Latent factor dimension used for matrix decomposition.
                          Bigger values require more time and memory but usually
                          provide better results.
                        default: 100
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      epochs:
                        type: integer
                        title: Training iterations
                        description: >-
                          Number of model training iterations. Model will
                          converge better with larger number at the expense of
                          increased training time. For bigger datasets use
                          smaller values.
                        default: 30
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      learningRate:
                        type: number
                        title: Learning rate
                        description: Model learning rate.
                        default: 0.05
                        hints:
                          - advanced
                      metadataCategoryFields:
                        type: array
                        title: Metadata fields for item-item evaluation
                        description: >-
                          These fields will be used for item-item evaluation and
                          for determining if the recommendation pair belong to
                          the same category.
                        hints:
                          - advanced
                        items:
                          type: string
                      minNumItemUniqueClicks:
                        type: integer
                        title: Training Data Filtered By Popular Items
                        description: >-
                          Items must have at least this no. of unique user
                          interactions to be included for training and
                          recommendations. The higher this value, the more
                          popular items selected but the amount of training data
                          will reduce.
                        default: 2
                        minimum: 1
                        exclusiveMinimum: false
                      minNumUserUniqueClicks:
                        type: integer
                        title: Training Data Filtered By User clicks
                        description: >-
                          Users must have at least this no. of unique item
                          interactions to be included for training and
                          recommendations. The higher this value, the more
                          active users are selected but the amount of training
                          data will reduce.
                        default: 2
                        minimum: 1
                        exclusiveMinimum: false
                      minNumClickedProducts:
                        type: integer
                        title: Minimum Clicked Products
                        description: >-
                          Minimum number of clicked products the user should
                          have to be a candidate for the test set.
                        default: 3
                        minimum: 2
                        exclusiveMinimum: false
                      maxNumTestUsers:
                        type: integer
                        title: Maximum Test Users
                        description: >-
                          Maximum number of test users to choose. If more users
                          satisfying the Minimum Clicked Products criterion are
                          present, the number will be capped to what is
                          specified here.
                        default: 10000
                        minimum: 0
                        exclusiveMinimum: false
                      numTestUserClicks:
                        type: integer
                        title: Number of User Clicks for Test
                        description: >-
                          How many test user clicks to use for testing. Should
                          be less than the value for Minimum Clicked Products.
                        default: 1
                        minimum: 1
                        exclusiveMinimum: false
                      doEvaluation:
                        type: boolean
                        title: Evaluate on test data
                        description: >-
                          Evaluate how well the trained model predicts user
                          clicks. Test data will be sampled from original
                          dataset.
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-item-recommender-user
                        default: argo-item-recommender-user
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - trainingFormat
                          - outputUserRecsCollection
                          - outputItemSimCollection
                          - outputFormat
                          - outputBatchSize
                          - secretName
                          - partitionFields
                      - label: Training Data Settings
                        properties:
                          - trainingDataFilterQuery
                          - trainingSampleFraction
                          - userIdField
                          - itemIdField
                          - weightField
                          - trainingDataFrameConfigOptions
                      - label: Model Tuning Parameters
                        properties:
                          - numRecsPerUser
                          - numSimsPerItem
                          - filterClicked
                          - userTopkAnn
                          - minNumItemUniqueClicks
                          - minNumUserUniqueClicks
                          - maxIters
                          - deleteOldRecs
                          - excludeFromDeleteFilter
                          - performANN
                          - maxNeighbors
                          - searchNN
                          - indexNN
                          - factors
                          - epochs
                          - learningRate
                          - randomSeed
                      - label: Evaluation Parameters
                        properties:
                          - doEvaluation
                          - minNumClickedProducts
                          - numTestUserClicks
                          - maxNumTestUsers
                      - label: Item Metadata Settings
                        properties:
                          - itemMetadataCollection
                          - itemMetadataFormat
                          - itemMetadataJoinField
                          - itemMetadataFields
                          - metadataCategoryFields
                  - type: object
                    title: Query-to-Query Collaborative Similarity (deprecated)
                    description: >-
                      Use this job to to batch compute query-query similarities
                      using ALS. Deprecated as of Fusion 5.2.0 and will be
                      removed in a future release; use the Query-to-Query
                      Session Based Similarity job instead.
                    required:
                      - id
                      - trainingCollection
                      - outputQuerySimCollection
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelId:
                        type: string
                        title: Recommender Model ID
                        description: >-
                          Identifier for the recommender model. Will be used as
                          the unique key when storing the model in Solr.
                        hints:
                          - advanced
                      modelCollection:
                        type: string
                        title: Model Collection
                        description: >-
                          Collection to load and store the computed model (if
                          absent, it won't be loaded or saved)
                        hints:
                          - advanced
                      saveModel:
                        type: boolean
                        title: Save Model in Solr
                        description: Whether we should save the computed ALS model in Solr
                        default: false
                        hints:
                          - advanced
                      trainingCollection:
                        type: string
                        title: Recommender Training Collection
                        description: >-
                          Item/Query preference collection (often a signals
                          collection or signals aggregation collection)
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr query to filter training data (e.g. downsampling
                          or selecting based on min. pref values)
                        default: '*:*'
                        hints:
                          - advanced
                      popularQueryMin:
                        type: integer
                        title: Training Data Filter By Popular Items
                        description: >-
                          Items must have at least this # of unique users
                          interacting with it to go into the sample
                        default: 2
                        hints:
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: >-
                          Downsample preferences for items (bounded to at least
                          2) by this fraction
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      outputQuerySimCollection:
                        type: string
                        title: Query-to-query Similarity Collection
                        description: >-
                          Collection to store batch-computed query/query
                          similarities (if absent, none computed)
                      outputItemsForQueriesCollection:
                        type: string
                        title: Items-for-query Boosting Collection
                        description: >-
                          Collection to store batch-computed items-for-queries
                          recommendations (if absent, none computed)
                      queryField:
                        type: string
                        title: Training Collection Query Field
                        description: Solr field name containing stored queries
                        default: query
                        hints:
                          - advanced
                      itemIdField:
                        type: string
                        title: Training Collection Item Id Field
                        description: Solr field name containing stored item ids
                        default: item_id_s
                        hints:
                          - advanced
                      weightField:
                        type: string
                        title: Training Collection Weight Field
                        description: >-
                          Solr field name containing stored weights (i.e. time
                          decayed / position weighted counts) the item has for
                          that query
                        default: weight_d
                        hints:
                          - advanced
                      numSims:
                        type: integer
                        title: Number of Query Similarities to Compute
                        description: >-
                          Batch compute and store this many query similarities
                          per query
                        default: 10
                        hints:
                          - advanced
                      numItemsPerQuery:
                        type: integer
                        title: Number of Items per Query to Recommend
                        description: >-
                          Batch compute and store this many item recommendations
                          per query
                        default: 10
                        hints:
                          - advanced
                      initialRank:
                        type: integer
                        title: Recommender Rank
                        description: >-
                          Number of user/item factors in the recommender
                          decomposition (or starting guess for it, if doing
                          parameter grid search)
                        default: 100
                        hints:
                          - advanced
                      initialBlocks:
                        type: integer
                        title: Training Block Size
                        description: >-
                          Number of sub-matrix blocks to break the training data
                          into (default: -1, for auto-sizing)
                        default: -1
                        hints:
                          - hidden
                      maxTrainingIterations:
                        type: integer
                        title: Maximum Training Iterations
                        description: >-
                          Maximum number of iterations to use when learning the
                          matrix decomposition
                        default: 10
                        hints:
                          - advanced
                      initialAlpha:
                        type: number
                        title: Implicit Preference Confidence
                        description: >-
                          Confidence weight (between 0 and 1) to give the
                          implicit preferences (or starting guess, if doing
                          parameter grid search)
                        default: 0.5
                        hints:
                          - advanced
                      initialLambda:
                        type: number
                        title: Smoothing
                        description: >-
                          Smoothing parameter to avoid overfitting (or starting
                          guess, if doing parameter grid search). Slightly
                          larger value needed for small data sets
                        default: 0.01
                        hints:
                          - advanced
                      gridSearchWidth:
                        type: integer
                        title: Grid Search Width
                        description: >-
                          Parameter grid search to be done centered around
                          initial parameter guesses, exponential step size, this
                          number of steps (if <= 0, no grid search)
                        default: 1
                        hints:
                          - advanced
                      randomSeed:
                        type: integer
                        title: Random Seed
                        description: >-
                          Pseudorandom determinism fixed by keeping this seed
                          constant
                        default: 13
                        hints:
                          - advanced
                      implicitRatings:
                        type: boolean
                        title: Implicit Preferences
                        description: >-
                          Treat training preferences as implicit signals of
                          interest (i.e. clicks or other actions) as opposed to
                          explicit query ratings
                        default: true
                      alwaysTrain:
                        type: boolean
                        title: Force model re-training
                        description: >-
                          Even if a model with this modelId exists, re-train if
                          set true
                        default: true
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: Options used when writing output to Solr.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - query_similarity
                        default: query_similarity
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - trainingDataFilterQuery
                          - modelCollection
                          - outputItemsForQueriesCollection
                          - outputQuerySimCollection
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingSampleFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - queryField
                          - itemIdField
                          - weightField
                      - label: Model Tuning Parameters
                        properties:
                          - alwaysTrain
                          - saveModel
                          - gridSearchWidth
                          - implicitRatings
                          - initialAlpha
                          - initialLambda
                          - initialRank
                          - maxTrainingIterations
                          - numItemsPerQuery
                          - numSims
                          - popularQueryMin
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: Content based Recommender
                    description: >-
                      Use this job when you want to compute item similarities
                      based on their content such as product descriptions. 
                    required:
                      - id
                      - trainingCollection
                      - trainingFormat
                      - outputCollection
                      - outputFormat
                      - itemIdField
                      - contentField
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      outputBatchSize:
                        type: string
                        title: Output Batch Size
                        description: Batch size of documents when pushing results to solr
                        default: '15000'
                        hints:
                          - advanced
                      unidecodeText:
                        type: boolean
                        title: Unidecode Text
                        description: Select if you want the text to be unidecoded.
                        default: true
                      lowercaseText:
                        type: boolean
                        title: Lowercase Text
                        description: Select if you want the text to be lowercased.
                        default: true
                      vectorizationUseDl:
                        type: boolean
                        title: Use Deep Learning for vectorization
                        description: >-
                          Select if you want to use deep learning as the method
                          for vectorization. You can choose the other methods
                          too in which case an ensemble will be used.
                        default: true
                      vectorizationUseFasttext:
                        type: boolean
                        title: Use Word2Vec for vectorization
                        description: >-
                          Select if you want to use word2vec as the method for
                          vectorization. You can choose the other methods too in
                          which case an ensemble will be used. Custom embeddings
                          will be learned. Useful for jargon.
                      vectorizationUseTfidf:
                        type: boolean
                        title: Use Tf-Idf for vectorization
                        description: >-
                          Select if you want to use Tf-idf as the method for
                          vectorization. You can choose the other methods too in
                          which case an ensemble will be used.
                      vectorizationDlEnsembleWeight:
                        type: number
                        title: Deep learning vectorization ensemble weight
                        description: >-
                          Ensemble weight for deep learning based vectorization
                          if more than one method of vectorization is selected.
                        default: 1
                      vectorizationFasttextVectorsSize:
                        type: integer
                        title: Size of word vectors
                        description: Word vector dimensions for Word2Vec vectorizer.
                        default: 150
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextWindowSize:
                        type: integer
                        title: Word2Vec window size
                        description: >-
                          The window size (context words from [-window, window])
                          for Word2Vec.
                        default: 5
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextEpochs:
                        type: integer
                        title: Word2Vec training epochs
                        description: Number of epochs to train custom Word2Vec embeddings.
                        default: 15
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextSkipGram:
                        type: boolean
                        title: Use SkipGram model
                        description: >-
                          Whether to use skip gram for training. If unchecked,
                          CBOW will be used.
                        default: true
                        hints:
                          - hidden
                      vectorizationFasttextMinCount:
                        type: integer
                        title: Min count of words
                        description: >-
                          Minimum times a token needs to occur in the text to be
                          considered for the vocab.
                        default: 1
                        hints:
                          - hidden
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextMaxVocabSize:
                        type: integer
                        title: Max vocab size
                        description: >-
                          Maximum number of tokens to consider for the vocab.
                          Less frequent tokens will be omitted.
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextUseSubwordNgram:
                        type: boolean
                        title: Use subword ngrams
                        description: Whether to use subword (character) ngrams.
                        default: true
                        hints:
                          - hidden
                      vectorizationFasttextMinNgram:
                        type: integer
                        title: Min Ngram size
                        description: Minimum size for ngrams generated.
                        default: 3
                        hints:
                          - hidden
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextMaxNgram:
                        type: integer
                        title: Max Ngram size
                        description: Maximum size for ngrams generated.
                        default: 6
                        hints:
                          - hidden
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationFasttextEnsembleWeight:
                        type: number
                        title: Word2Vec vectorization ensemble weight
                        description: >-
                          Ensemble weight for Fasttext based vectorization if
                          more than one method of vectorization is selected.
                        default: 1
                      vectorizationTfidfUseCharacters:
                        type: boolean
                        title: Use characters ngrams
                        description: Whether to use characters. By default words are used.
                      vectorizationTfidfFilterStopwords:
                        type: boolean
                        title: Filter stopwords
                        description: >-
                          Whether to filter out stopwords before generating
                          Tf-Idf weights.
                        default: true
                      vectorizationTfidfMinDf:
                        type: number
                        title: Min Document Frequency
                        description: Minimum Df for token to be considered.
                        hints:
                          - hidden
                      vectorizationTfidfMaxDf:
                        type: number
                        title: Max Document Frequency
                        description: Maximum Df for token to be considered.
                        default: 1
                        hints:
                          - hidden
                      vectorizationTfidfMinNgram:
                        type: integer
                        title: Min Ngram size
                        description: Minimum Ngram size to be used.
                        default: 1
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationTfidfMaxNgram:
                        type: integer
                        title: Max Ngram size
                        description: Maximum Ngram size to be used.
                        default: 3
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationTfIdfMaxVocabSize:
                        type: integer
                        title: Max vocab size
                        description: >-
                          Maximum number of tokens to consider for the vocab.
                          Less frequent tokens will be omitted.
                        minimum: 1
                        exclusiveMinimum: false
                      vectorizationTfidfEnsembleWeight:
                        type: number
                        title: Tf-Idf vectorization ensemble weight
                        description: >-
                          Ensemble weight for Tf-Idf based vectorization if more
                          than one method of vectorization is selected.
                        default: 1
                      topKAnn:
                        type: integer
                        title: No. of Item Recs to  compute for ensemble
                        description: >-
                          This is used to fetch additional recommendations so
                          that the value specified for the Number of User
                          Recommendations to Compute is most likely satisfied
                          after filtering. This is normally set to 10 * (No. of
                          item recommendations to compute)
                        default: 100
                        minimum: 1
                        exclusiveMinimum: false
                      jobRunName:
                        type: string
                        title: Job Run Name
                        description: >-
                          Identifier for this job run. Use it to filter
                          recommendations from particular runs
                        hints:
                          - advanced
                      trainingCollection:
                        type: string
                        title: Training data path
                        description: >-
                          Solr collection or cloud storage path where training
                          data is present.
                        minLength: 1
                      trainingFormat:
                        type: string
                        title: Training data format
                        description: The format of the training data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      secretName:
                        type: string
                        title: Cloud storage secret name
                        description: >-
                          Name of the secret used to access cloud storage as
                          defined in the K8s namespace
                        hints:
                          - advanced
                        minLength: 1
                      outputCollection:
                        type: string
                        title: Output data path
                        description: >-
                          Solr collection or cloud storage path where output
                          data is to be written.
                      outputFormat:
                        type: string
                        title: Output data format
                        description: The format of the output data - solr, parquet etc.
                        default: solr
                        minLength: 1
                      partitionFields:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      numSimsPerItem:
                        type: integer
                        title: No. of Item Recs to Compute
                        description: Number of recommendations that will be saved per item.
                        default: 10
                        minimum: 1
                        exclusiveMinimum: false
                      deleteOldRecs:
                        type: boolean
                        title: Delete Old Recommendations
                        description: >-
                          Should previous recommendations be deleted. If this
                          box is unchecked, then old recommendations will not be
                          deleted but new recommendations will be appended with
                          a different Job ID. Both sets of recommendations will
                          be contained within the same collection. Will only
                          work when output path is solr.
                        default: true
                      excludeFromDeleteFilter:
                        type: string
                        title: Exclude from Delete Filter
                        description: >-
                          If the 'Delete Old Recommendations' flag is enabled,
                          then use this query filter to identify existing
                          recommendation docs to exclude from delete. The filter
                          should identify recommendation docs you want to keep.
                        hints:
                          - advanced
                      metadataCategoryFields:
                        type: array
                        title: Metadata fields for item-item evaluation
                        description: >-
                          These fields will be used for item-item evaluation and
                          for determining if the recommendation pair belongs to
                          the same category.
                        hints:
                          - advanced
                        items:
                          type: string
                      trainingDataFilterQuery:
                        type: string
                        title: Training Data Filter Query
                        description: >-
                          Solr or SQL query to filter training data. Use solr
                          query when solr collection is specified in Training
                          Path. Use SQL query when cloud storage location is
                          specified. The table name for SQL is `spark_input`.
                        hints:
                          - code/sql
                          - advanced
                      trainingSampleFraction:
                        type: number
                        title: Training Data Sampling Fraction
                        description: Choose a fraction of the data for training.
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      itemIdField:
                        type: string
                        title: Training Item Id Field
                        description: Field name containing stored item ids
                        default: item_id_s
                        minLength: 1
                      contentField:
                        type: array
                        title: Training Content Field
                        description: >-
                          Field name containing item content such as product
                          description
                        items:
                          type: string
                      randomSeed:
                        type: integer
                        title: Random Seed
                        description: >-
                          Pseudorandom determinism fixed by keeping this seed
                          constant
                        default: 12345
                        hints:
                          - advanced
                      itemMetadataFields:
                        type: array
                        title: Item Metadata Fields
                        description: >-
                          List of item metadata fields to include in the
                          recommendation output documents.
                        hints:
                          - advanced
                        items:
                          type: string
                      vectorizationDlBatchSize:
                        type: integer
                        title: Batch size to compute encodings
                        description: >-
                          Compute encodings in batches in case hardware out of
                          memory.
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      performANN:
                        type: boolean
                        title: Perform approximate nearest neighbor search
                        description: >-
                          Whether to perform approximate nearest neighbor search
                          (ANN). ANN will drastically reduce training time, but
                          accuracy will drop a little. Disable only if dataset
                          is very small.
                        default: true
                      maxNeighbors:
                        type: integer
                        title: Max neighbors for indexing
                        description: >-
                          If perform ANN, size of the potential neighbors for
                          the indexing phase. Higher value leads to better
                          recall and shorter retrieval times (at the expense of
                          longer indexing time).Reasonable range: 5~100
                        hints:
                          - advanced
                        maximum: 100
                        exclusiveMaximum: false
                        minimum: 5
                        exclusiveMinimum: false
                      searchNN:
                        type: integer
                        title: Search Depth
                        description: >-
                          If perform ANN, the depth of search used to find
                          neighbors. Higher value improves recall at the expense
                          of longer retrieval time.Reasonable range: 100~2000
                        hints:
                          - advanced
                        maximum: 2000
                        exclusiveMaximum: false
                        minimum: 100
                        exclusiveMinimum: false
                      indexNN:
                        type: integer
                        title: Indexing Depth
                        description: >-
                          If perform ANN, the depth of constructed index. Higher
                          value improves recall at the expense of longer
                          indexing time.Reasonable range: 100~2000
                        hints:
                          - advanced
                        maximum: 2000
                        exclusiveMaximum: false
                        minimum: 100
                        exclusiveMinimum: false
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-item-recommender-content
                        default: argo-item-recommender-content
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - trainingFormat
                          - outputCollection
                          - outputFormat
                          - outputBatchSize
                          - secretName
                          - partitionFields
                      - label: Training Data Settings
                        properties:
                          - trainingDataFilterQuery
                          - trainingSampleFraction
                          - randomSeed
                          - itemIdField
                          - contentField
                      - label: Model Tuning Parameters
                        properties:
                          - numSimsPerItem
                          - topKAnn
                          - performANN
                          - maxNeighbors
                          - searchNN
                          - indexNN
                          - unidecodeText
                          - lowercaseText
                          - deleteOldRecs
                          - excludeFromDeleteFilter
                      - label: Vectorization Parameters
                        properties:
                          - vectorizationUseDl
                          - vectorizationUseFasttext
                          - vectorizationUseTfidf
                      - label: Deep Learning Vectorization Parameters
                        properties:
                          - vectorizationDlBatchSize
                          - vectorizationDlEnsembleWeight
                      - label: Word2Vec Vectorization Parameters
                        properties:
                          - vectorizationFasttextVectorsSize
                          - vectorizationFasttextWindowSize
                          - vectorizationFasttextEpochs
                          - vectorizationFasttextMinNgram
                          - vectorizationFasttextEnsembleWeight
                          - vectorizationFasttextMaxVocabSize
                      - label: Tf-Idf Vectorization Parameters
                        properties:
                          - vectorizationTfidfUseCharacters
                          - vectorizationTfidfFilterStopwords
                          - vectorizationTfidfMinNgram
                          - vectorizationTfidfMaxNgram
                          - vectorizationTfIdfMaxVocabSize
                          - vectorizationTfidfEnsembleWeight
                      - label: Item Metadata Settings
                        properties:
                          - itemMetadataFields
                          - metadataCategoryFields
                  - type: object
                    title: Delete Seldon Core Model Deployment
                    description: Removes a Seldon Core deployment from the cluster
                    required:
                      - id
                      - modelName
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelName:
                        type: string
                        title: Model name
                        description: The model name of the Seldon Core deployment to delete
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-delete-model
                        default: argo-delete-model
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                  - type: object
                    title: Logistic Regression Classifier Training (deprecated)
                    description: >-
                      Use this job when you have training data and you want to
                      train a logistic regression model to classify text into
                      groups. Deprecated as of Fusion 5.2.0 and will be removed
                      in a future release; use the Classification job instead.
                    required:
                      - id
                      - trainingCollection
                      - fieldToVectorize
                      - dataFormat
                      - trainingLabelField
                      - type
                    properties:
                      id:
                        type: string
                        title: Spark Job ID
                        description: >-
                          The ID for this Spark job. Used in the API to
                          reference this job. Allowed characters: a-z, A-Z, dash
                          (-) and underscore (_). Maximum length: 63 characters.
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Spark Settings
                        description: Spark configuration settings.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      trainingCollection:
                        type: string
                        title: Training Collection
                        description: Solr Collection containing labeled training data
                        minLength: 1
                      fieldToVectorize:
                        type: string
                        title: Field to Vectorize
                        description: >-
                          Solr field containing text training data. Data from
                          multiple fields with different weights can be combined
                          by specifying them as field1:weight1,field2:weight2
                          etc.
                        minLength: 1
                      dataFormat:
                        type: string
                        title: Data format
                        description: >-
                          Spark-compatible format that contains training data
                          (like 'solr', 'parquet', 'orc' etc)
                        default: solr
                        minLength: 1
                      trainingDataFrameConfigOptions:
                        type: object
                        title: Dataframe Config Options
                        description: >-
                          Additional spark dataframe loading configuration
                          options
                        properties: {}
                        additionalProperties:
                          type: string
                        hints:
                          - advanced
                      trainingDataFilterQuery:
                        type: string
                        title: Training data filter query
                        description: >-
                          Solr query to use when loading training data if using
                          Solr
                        default: '*:*'
                        hints:
                          - advanced
                      sparkSQL:
                        type: string
                        title: Spark SQL filter query
                        description: >-
                          Use this field to create a Spark SQL query for
                          filtering your input data. The input data will be
                          registered as spark_input
                        default: SELECT * from spark_input
                        hints:
                          - code/sql
                          - advanced
                      trainingDataSamplingFraction:
                        type: number
                        title: Training data sampling fraction
                        description: Fraction of the training data to use
                        default: 1
                        hints:
                          - advanced
                        maximum: 1
                        exclusiveMaximum: false
                      randomSeed:
                        type: integer
                        title: Random seed
                        description: For any deterministic pseudorandom number generation
                        default: 1234
                        hints:
                          - advanced
                      outputCollection:
                        type: string
                        title: Output Collection
                        description: Solr Collection to store model-labeled data to
                      overwriteOutput:
                        type: boolean
                        title: Overwrite Output
                        description: Overwrite output collection
                        default: true
                        hints:
                          - hidden
                          - advanced
                      dataOutputFormat:
                        type: string
                        title: Data output format
                        description: >-
                          Spark-compatible output format (like 'solr',
                          'parquet', etc)
                        default: solr
                        hints:
                          - advanced
                        minLength: 1
                      sourceFields:
                        type: string
                        title: Fields to Load
                        description: >-
                          Solr fields to load (comma-delimited). Leave empty to
                          allow the job to select the required fields to load at
                          runtime.
                        hints:
                          - advanced
                      partitionCols:
                        type: string
                        title: Partition fields
                        description: >-
                          If writing to non-Solr sources, this field will accept
                          a comma-delimited list of column names for
                          partitioning the dataframe before writing to the
                          external output 
                        hints:
                          - advanced
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      modelId:
                        type: string
                        title: Model ID
                        description: >-
                          Identifier for the model to be trained; uses the
                          supplied Spark Job ID if not provided.
                        hints:
                          - advanced
                        minLength: 1
                      analyzerConfig:
                        type: string
                        title: Lucene Analyzer Schema
                        description: >-
                          LuceneTextAnalyzer schema for tokenization
                          (JSON-encoded)
                        default: >-
                          { "analyzers": [{ "name":
                          "StdTokLowerStop","charFilters": [ { "type":
                          "htmlstrip" } ],"tokenizer": { "type": "standard"
                          },"filters": [{ "type": "lowercase" },{ "type":
                          "KStem" },{ "type": "length", "min": "2", "max":
                          "32767" },{ "type": "fusionstop", "ignoreCase":
                          "true", "format": "snowball", "words":
                          "org/apache/lucene/analysis/snowball/english_stop.txt"
                          }] }],"fields": [{ "regex": ".+", "analyzer":
                          "StdTokLowerStop" } ]}
                        hints:
                          - advanced
                          - code/json
                          - lengthy
                      withIdf:
                        type: boolean
                        title: IDF Weighting
                        description: >-
                          Weight vector components based on inverse document
                          frequency
                        default: true
                        hints:
                          - advanced
                      w2vDimension:
                        type: integer
                        title: Word2Vec Dimension
                        description: >-
                          Word-vector dimensionality to represent text (choose >
                          0 to use)
                        default: 0
                        hints:
                          - advanced
                        minimum: 0
                        exclusiveMinimum: false
                      w2vWindowSize:
                        type: integer
                        title: Word2Vec Window Size
                        description: >-
                          The window size (context words from [-window, window])
                          for word2vec
                        default: 5
                        hints:
                          - advanced
                        minimum: 3
                        exclusiveMinimum: false
                      w2vMaxSentenceLength:
                        type: integer
                        title: Max Word2Vec Sentence Length
                        description: >-
                          Sets the maximum length (in words) of each sentence in
                          the input data. Any sentence longer than this
                          threshold will be divided into chunks of up to
                          `maxSentenceLength` size.
                        default: 1000
                        hints:
                          - advanced
                        minimum: 3
                        exclusiveMinimum: false
                      w2vMaxIter:
                        type: integer
                        title: Max Word2Vec Iterations
                        description: Maximum number of iterations of the word2vec training
                        default: 1
                        hints:
                          - advanced
                      w2vStepSize:
                        type: number
                        title: Word2Vec Step Size
                        description: >-
                          Training parameter for word2vec convergence (change at
                          your own peril)
                        default: 0.025
                        hints:
                          - advanced
                        minimum: 0.005
                        exclusiveMinimum: false
                      minDF:
                        type: number
                        title: Minimum Term Document Frequency
                        description: >-
                          To be kept, terms must occur in at least this number
                          of documents (if > 1.0), or at least this fraction of
                          documents (if <= 1.0)
                        default: 0
                        hints:
                          - advanced
                      maxDF:
                        type: number
                        title: Max Term Document Frequency
                        description: >-
                          To be kept, terms must occur in no more than this
                          number of documents (if > 1.0), or no more than this
                          fraction of documents (if <= 1.0)
                        default: 1
                        hints:
                          - advanced
                      norm:
                        type: integer
                        title: Vector normalization
                        description: >-
                          p-norm to normalize vectors with (choose -1 to turn
                          normalization off)
                        enum:
                          - -1
                          - 0
                          - 1
                          - 2
                        default: 2
                        hints:
                          - advanced
                      predictedLabelField:
                        type: string
                        title: Predicted Label Field
                        description: >-
                          Solr field which will contain labels when classifier
                          is applied to documents
                        default: labelPredictedByFusionModel
                        hints:
                          - advanced
                      serializeAsMleap:
                        type: boolean
                        title: Serialize as Mleap Bundle
                        description: Serialize the output model as Mleap Bundle
                        default: true
                        hints:
                          - hidden
                      minSparkPartitions:
                        type: integer
                        title: Minimum Number of Spark Partitions
                        description: Minimum number of Spark partitions for training job.
                        default: 200
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      stopwordsList:
                        type: array
                        title: List of stopwords
                        description: Stopwords defined in Lucene analyzer config
                        hints:
                          - readonly
                          - hidden
                        items:
                          type: string
                          minLength: 1
                          reference: blob
                          blobType: file:spark
                      overwriteExistingModel:
                        type: boolean
                        title: Overwrite existing model
                        description: >-
                          If a model exists in the model store, overwrite when
                          this job runs
                        default: true
                        hints:
                          - advanced
                      trainingLabelField:
                        type: string
                        title: Label Field
                        description: >-
                          Solr field containing labels for training instances
                          (should be single-valued strings)
                      gridSearch:
                        type: boolean
                        title: Grid Search with Cross Validation
                        description: Perform grid search to optimize hyperparameters
                        default: false
                      evaluationMetricType:
                        type: string
                        title: Evaluation Metric Type
                        description: >-
                          Optimize hyperparameter search over one of [binary,
                          multiclass, regression] metrics, or 'none'
                        enum:
                          - binary
                          - multiclass
                          - regression
                          - none
                        default: none
                        hints:
                          - advanced
                      autoBalanceClasses:
                        type: boolean
                        title: Auto-balance training classes
                        description: >-
                          Ensure that all classes of training data have the same
                          size
                        default: true
                        hints:
                          - advanced
                      minTrainingSamplesPerClass:
                        type: integer
                        title: Minimum Labeled Class Size
                        description: >-
                          Ensure that all classes of training data have at least
                          this many examples
                        default: 100
                        hints:
                          - advanced
                        minimum: 1
                        exclusiveMinimum: false
                      makeOtherClass:
                        type: boolean
                        title: Make 'Other' Class
                        description: >-
                          Create a label class 'Other' which contains all
                          examples not in a class large enough to train on
                        default: true
                        hints:
                          - advanced
                      otherClassName:
                        type: string
                        title: '''Other'' class name'
                        description: Label class name for the catch-all 'Other' class
                        default: Other
                        hints:
                          - advanced
                        minLength: 1
                      regularizationWeight:
                        type: number
                        title: Regularization weight
                        description: >-
                          Degree of regularization to use when training (L2
                          lambda parameter if elasticNetWeight = 0)
                        default: 0.01
                        maximum: 1
                        exclusiveMaximum: false
                        minimum: 0.000001
                        exclusiveMinimum: false
                      elasticNetWeight:
                        type: number
                        title: Elastic net weight
                        description: >-
                          Value between 0 and 1 to interpolate between ridge
                          (0.0) and lasso (1.0) regression
                        default: 0
                        maximum: 1
                        exclusiveMaximum: false
                      maxIters:
                        type: integer
                        title: Maximum number of iterations
                        description: >-
                          Maximum number of iterations to perform before
                          halting, even if the convergence criterion has not
                          been met.
                        default: 10
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - logistic_regression_classifier_trainer
                        default: logistic_regression_classifier_trainer
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
                    propertyGroups:
                      - label: Input/Output Parameters
                        properties:
                          - trainingCollection
                          - outputCollection
                          - dataFormat
                          - trainingDataFilterQuery
                          - readOptions
                          - writeOptions
                          - trainingDataFrameConfigOptions
                          - trainingDataSamplingFraction
                          - randomSeed
                      - label: Field Parameters
                        properties:
                          - fieldToVectorize
                          - sourceFields
                          - predictedLabelField
                          - trainingLabelField
                      - label: Model Tuning Parameters
                        properties:
                          - w2vDimension
                          - w2vWindowSize
                          - w2vMaxIter
                          - w2vMaxSentenceLength
                          - w2vStepSize
                          - withIdf
                          - maxDF
                          - minDF
                          - norm
                          - autoBalanceClasses
                          - evaluationMetricType
                          - minTrainingSamplesPerClass
                          - otherClassName
                          - makeOtherClass
                          - gridSearch
                          - elasticNetWeight
                          - maxIters
                          - regularizationWeight
                      - label: Featurization Parameters
                        properties:
                          - analyzerConfig
                      - label: Misc. Parameters
                        properties:
                          - modelId
                  - type: object
                    title: Create Ray Model Deployment
                    description: Deploys a Ray Model into the Fusion cluster
                    required:
                      - id
                      - deployModelName
                      - modelCpuLimit
                      - modelMemoryLimit
                      - modelDockerRepo
                      - modelDockerImage
                      - type
                    properties:
                      id:
                        type: string
                        title: Job ID
                        description: >-
                          The ID for this job. Used in the API to reference this
                          job. Allowed characters: a-z, A-Z, dash (-) and
                          underscore (_)
                        maxLength: 63
                        pattern: '[a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?'
                      sparkConfig:
                        type: array
                        title: Additional parameters
                        description: >-
                          Provide additional key/value pairs to be injected into
                          the training JSON map at runtime. Values will be
                          inserted as-is, so use " to surround string values
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      writeOptions:
                        type: array
                        title: Write Options
                        description: >-
                          Options used when writing output to Solr or other
                          sources
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      readOptions:
                        type: array
                        title: Read Options
                        description: >-
                          Options used when reading input from Solr or other
                          sources.
                        hints:
                          - advanced
                        items:
                          type: object
                          required:
                            - key
                          properties:
                            key:
                              type: string
                              title: Parameter Name
                            value:
                              type: string
                              title: Parameter Value
                      deployModelName:
                        type: string
                        title: Model name
                        description: >-
                          The model name of the Ray deployment to deploy (must
                          be a valid lowercased DNS subdomain with no
                          underscores).
                        maxLength: 30
                        pattern: >-
                          ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
                      modelMinReplicas:
                        type: integer
                        title: Model min replicas
                        description: Minimum number of replicas of the model to be deployed
                        default: 1
                      modelMaxReplicas:
                        type: integer
                        title: Model max replicas
                        description: Maximum number of replicas of the model to be deployed
                        default: 1
                      modelCpuLimit:
                        type: number
                        title: Model CPU limit
                        description: >-
                          Maximum number of CPUs that can be allocated to a
                          single model replica
                        default: 1
                      modelMemoryLimit:
                        type: string
                        title: Model memory limit
                        description: >-
                          Maximum amount of memory that can be allocated to a
                          single model replica
                        default: 1Gi
                        pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$
                      modelImportPath:
                        type: string
                        title: Ray deployment import path
                        description: >-
                          The path to your top-level Ray Serve deployment (or
                          the same path passed to `serve run`)
                        default: deployment:app
                      modelDockerRepo:
                        type: string
                        title: Docker repository
                        description: >-
                          Defines the Docker repository where the model image is
                          located.
                      modelDockerImage:
                        type: string
                        title: Image name
                        description: Name of the model's docker image
                      modelDockerSecret:
                        type: string
                        title: Kubernetes secret name for model repo
                        description: >-
                          Defines the Kubernetes secret to be used with the
                          Docker repository
                      type:
                        type: string
                        title: Spark Job Type
                        enum:
                          - argo-deploy-ray-model
                        default: argo-deploy-ray-model
                        hints:
                          - readonly
                    additionalProperties: true
                    category: Other
                    categoryPriority: 1
components:
  schemas:
    ObjectType:
      type: object
      properties:
        type:
          type: string
          enum:
            - string
            - number
            - integer
            - boolean
            - object
            - array
            - 'null'
            - ref
        title:
          type: string
        description:
          type: string
        required:
          uniqueItems: true
          type: array
          items:
            type: string
        properties:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/AnyType'
        additionalProperties:
          type: object
        minProperties:
          type: integer
          format: int32
        maxProperties:
          type: integer
          format: int32
        definitions:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/AnyType'
        category:
          type: string
        categoryPriority:
          type: integer
          format: int32
        enum:
          uniqueItems: true
          type: array
          items:
            type: object
        hints:
          uniqueItems: true
          type: array
          items:
            type: string
        default:
          type: object
        oneOf:
          type: array
          items:
            $ref: '#/components/schemas/AnyTypeObjectObject'
        propertyGroups:
          type: array
          items:
            $ref: '#/components/schemas/PropertyGroup'
    AnyType:
      type: object
      properties:
        type:
          type: string
          enum:
            - string
            - number
            - integer
            - boolean
            - object
            - array
            - 'null'
            - ref
        title:
          type: string
        description:
          type: string
        enum:
          uniqueItems: true
          type: array
          items:
            type: object
        default:
          type: object
        category:
          type: string
        categoryPriority:
          type: integer
          format: int32
        hints:
          uniqueItems: true
          type: array
          items:
            type: string
        oneOf:
          type: array
          items:
            $ref: '#/components/schemas/AnyTypeObjectObject'
    AnyTypeObjectObject:
      type: object
      properties:
        type:
          type: string
          enum:
            - string
            - number
            - integer
            - boolean
            - object
            - array
            - 'null'
            - ref
        title:
          type: string
        description:
          type: string
        enum:
          uniqueItems: true
          type: array
          items:
            type: object
        default:
          type: object
        category:
          type: string
        categoryPriority:
          type: integer
          format: int32
        hints:
          uniqueItems: true
          type: array
          items:
            type: string
    PropertyGroup:
      type: object
      properties:
        label:
          type: string
        properties:
          type: array
          items:
            type: string
  securitySchemes:
    Basic auth:
      type: http
      scheme: basic
    API key:
      name: x-api-key
      type: apiKey
      in: header

````