aMaxLen
Answer length
|
Average length of question by number of tokens
type: integer
|
answerColName
Answer Field
required
|
Name of the field containing answers
type: string
minLength: 1
|
baseLR
Base learning rate
|
Base learning rate used in cyclical training
type: number
|
cudnn
GPU available
|
Use GPU for training if available (recommended NVIDIA GPU with 8Gb or more memory)
type: boolean
default value: 'false '
|
deployModelName
Model Deployment Name
required
|
Name of the model to be used for deployment (must be a valid DNS subdomain with no underscores)
type: string
maxLength: 30
pattern: [a-zA-Z][\-a-zA-Z0-9]*[a-zA-Z0-9]?
|
embSPDP
Dropout ratio
|
Fraction of input to drop with Dropout layer (from 0-1)
type: number
default value: '0.15 '
|
epochs
Number of epochs to be used in training
|
type: integer
|
extraTrainingArgs
Extra training args for Python scripts
|
Add any additional arguments for the Python training scripts in this field
type: string
|
id
Job ID
required
|
The ID for this job. Used in the API to reference this job. Allowed characters: a-z, A-Z, dash (-) and underscore (_)
type: string
maxLength: 63
pattern: [a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?
|
infBatch
Inference batch size used in validation
|
Batch size during validation. If left blank, this will be set automatically based on the input data
type: integer
|
kList
Metrics@k list
|
The k retrieval position that will be used to compute for each metric
type: string
default value: '[1,3,5] '
|
lowerCases
Lower case all words
|
Whether to lower case all words in training, i.e. whether to treat upper case and lower case words equally.
type: boolean
default value: 'false '
|
maxLR
Maximum learning rate
|
Maximum learning rate used in cyclical training
type: number
|
maxTokensNum
Maximum number of words in doc
|
Drop document if the total words is greater than this value
type: integer
default value: '5000 '
exclusiveMinimum: false
minimum: 1
|
maxVocabSize
Maximum vocabulary size
|
Maximum number of words in vocabulary, words will be trimmed if frequency is too low
type: integer
default value: '100000 '
exclusiveMinimum: false
minimum: 1
|
minTokensNum
Minimum number of words in doc
|
Drop document if the total words is lower than this value
type: integer
default value: '1 '
exclusiveMinimum: false
minimum: 1
|
modelReplicas
Model replicas
|
How many replicas of the model should be deployed by Seldon Core
type: integer
default value: '1 '
|
monitorMetric
Monitor metric
|
The metric that is chosen among all possible metrics at k to be used to decide when to stop training
type: string
default value: 'mrr@3 '
|
monitorMetricsList
Metrics list
|
List of evaluation metrics on validation data that will be printed in the log at the end of each epoch
type: string
default value: '["map", "mrr", "precision", "recall", "roc_auc"] '
|
monitorPatience
Monitor patience
|
Stop training if no improvement in metrics by this number of epochs
type: integer
|
numClusters
Number of clusters
|
Number of clusters to be used for fast dense vector retrieval. Note no clustering will be applied if this is set to 0. If left blank, cluster count will be inferred by the job depending on the data
type: integer
|
numNeg
Number of negative QA pairs
|
Number of non-matching answers randomly sampled for each question to be used as negative examples when constructing
type: integer
default value: '15 '
|
numPos
Number of positive QA pairs
|
Number of answers to be used for each question when constructing validation data
type: integer
default value: '5 '
|
qMaxLen
Question length
|
Average length of question by number of tokens
type: integer
|
questionColName
Question Field
required
|
Name of the field containing questions
type: string
minLength: 1
|
rnnNamesList
RNN function list
|
List of layers of RNNs can be used, with possible values of lstm, gru. E.g. ["lstm", "lstm"]. This value will be automatically decided based on data if left blank
type: string
|
rnnUnitsList
RNN function units list
|
List of RNN layer units numbers, corresponding to RNN function list. E.g. 150, 150. This value will be automatically decided based on data if left blank
type: string
|
samplingProportion
Sampling proportion
|
The proportion of data to be sampled from the full dataset. Use a value between 0 and 1 for a proportion (e.g. 0.5 for 50%), or for a specific number of examples, use an integer larger than 1. Leave blank for no sampling
type: number
|
scaleOnly
Scale Seldon Core Replica Deployment Only
|
Run job but only adjust Seldon Core replica count (no training)
type: boolean
default value: 'false '
|
seed
Seed
|
The proportion of data to be sampled from the full dataset. Use a value between 0 and 1 for a proportion (e.g. 0.5 for 50%), or for a specific number of examples, use an integer larger than 1
type: integer
default value: '12345 '
|
sparkConfig
Additional parameters
|
Provide additional key/value pairs to be injected into the training JSON map at runtime. Values will be inserted as-is, so use " to surround string values
type: array of object
object attributes: {
key
(required)
: {
display name: Parameter Name
type: string
}
value
: {
display name: Parameter Value
type: string
}
}
|
testMode
Test Mode
|
If set to true, then the training will exit after the first iteration. Useful for ensuring that the end-to-end pipeline is working
type: boolean
default value: 'false '
|
topKClusters
Top k of clusters to return
|
How many closest clusters the model can find for each query. At retrieval time, all answers in top k nearest clusters will be returned and reranked
type: integer
default value: '10 '
|
trainBatch
Training batch size
|
Batch size during training. If left blank, this will be set automatically based on the input data
type: integer
|
trainingCollection
Training Collection
required
|
Solr Collection containing question and answer pairs
type: string
minLength: 1
|
type
Spark Job Type
required
|
type: string
default value: 'argo-qna-supervised '
enum: {
argo-qna-supervised
}
|
unidecode
Apply unicode decoding
|
Use Unidecode library to transform Unicode input into ASCII transliterations
type: boolean
default value: 'true '
|
useAutoML
Perform auto hyperparameter tuning
|
Automatically tune hyperparameters (will take longer to train)
type: boolean
default value: 'false '
|
useCustomEmbeddings
Use custom embeddings
|
Choose this option when there are many uncommon words or jargons in data. NOTE: please look at log for warning about percentage of covered vocabulary words, if this proportion is less than 80%, please set this parameter to true and do not use the pre-trained embeddings shipped with our package
type: boolean
default value: 'false '
|
valSize
Validation sample size
|
Proportion of the original data to be used as validation sample
type: number
default value: '0.1 '
exclusiveMinimum: false
minimum: 0.001
|
w2vEpochs
Word2Vec training epochs
|
Number of epochs to train custom word2vec embeddings
type: integer
default value: '15 '
|
w2vTextColumns
Word2Vec training fields
|
Which fields in the Word2Vec training collection to use in Word2Vec vocabulary embedding training. If multiple fields, please separate them by comma, e.g. description_t,title_t.
type: string
|
w2vTextsCollection
Word2Vec training collection
|
Name of the collection which contains the documents that will be used to train Word2Vec if pre-trained word2vec embeddings won't be used.
type: string
|
w2vVectorSize
Size of word vectors
|
Word-vector dimensionality to represent text (suggested dimension ranges: 100~150
type: integer
default value: '150 '
|
w2vWindowSize
Word2Vec window size
|
The window size (context words from [-window, window]) for Word2Vec
type: integer
default value: '8 '
|
weightDecay
Weight decay
|
L2 penalty used in Adam optimization. Bigger values will provide stronger regularization
type: number
default value: '0.0001 '
|