deployModelName
Model Deployment Name
required
|
Name of the model to be used for deployment (must be a valid DNS subdomain with no underscores).
type: string
maxLength: 30
pattern: [a-zA-Z][\-a-zA-Z0-9]*[a-zA-Z0-9]?
|
dimReduction
Perform Dimensionality Reduction
|
Whether to perform dimensionality reduction or not. Truncated SVD is used to reduce dimensionality. Reduces overfitting and training time. Note that sparse vectors will become dense.
type: boolean
default value: 'false '
|
dimReductionSize
Reduced Dimension Size
|
The target dimension size of the features after dimensionality reduction.
type: integer
default value: '256 '
exclusiveMinimum: false
minimum: 1
|
dropout
Dropout
|
Probability for applying dropout regularization.
type: number
default value: '0.2 '
|
embeddingReg
Embedding regularization
|
The scale of how critical the algorithm should be of minimizing the maximum similarity between embeddings of different classes
type: number
default value: '0.8 '
|
embeddingsSize
Embedding size
|
Dimension size of final embedding vectors for text and class.
type: integer
default value: '100 '
exclusiveMinimum: false
minimum: 1
|
featurizerType
Featurizer
|
The type of featurizer to use. TFIDF will compute both term-frequency and inverse document-frequency, whereas Count will use only term-frequency
type: string
default value: 'tfidf '
enum: {
tfidf
count
}
|
id
Job ID
required
|
The ID for this job. Used in the API to reference this job. Allowed characters: a-z, A-Z, dash (-) and underscore (_)
type: string
maxLength: 63
pattern: [a-zA-Z][_\-a-zA-Z0-9]*[a-zA-Z0-9]?
|
l1Ratio
L1 penalty ratio
|
Only used with the `elasticnet` penalty. If its value = 0, l2 penalty will be used. If it's value = 1, l1 penalty will be used. A value in between will use the appropirate ratio of l1 and l2 penalties.
type: number
default value: '0.5 '
exclusiveMaximum: false
maximum: 1.0
|
labelField
Training collection class field
required
|
Solr field name containing the classes/labels for the text
type: string
minLength: 1
|
labelLayersSizes
Hidden sizes before class embedding
|
Sizes of hidden layers before the embedding layer for classes. Specify as a list of numbers for multiple layers or a single number for 1 layer. Leave blank if no hidden layers are required.
type: string
default value: '[] '
pattern: ^(\[(((\d)*,\s*)*(\d+)+)?\])?$
|
lowercaseTexts
Lowercase Text
|
Select if you want the text to be lowercased
type: boolean
default value: 'true '
|
maxBatchSize
Maximum Batch Size
|
The largest batch size to use during training. Batch size will be increased linearly every epoch, upto the maximum batch size specified.
type: integer
default value: '128 '
exclusiveMinimum: false
minimum: 1
|
maxCharLen
Maximum No. of Characters
|
Maximum length, in characters, of the training text. Texts longer than this value will be truncated.
type: integer
default value: '100000 '
exclusiveMinimum: false
minimum: 1
|
maxDf
Max Document Frequency
|
Maximum Df for token to be considered. Provide a float (0,1) if you want to specify as a fraction, otherwise integer >= 1 to specify the exact number of documents in which a token should occur
type: number
default value: '0.8 '
|
maxFeatures
Maximum Vocab Size
|
Maximum number of tokens (including word or character ngrams) to consider for the vocabulary. Less frequent tokens will be omitted.
type: integer
default value: '250000 '
exclusiveMinimum: false
minimum: 1
|
maxIter
Maximum iterations for algorithm
|
Maximum number of iterations taken for the optimization algorithm to converge.
type: integer
default value: '200 '
exclusiveMinimum: false
minimum: 1
|
maxNgram
Max Ngram size
|
Maximum word or character ngram size to be used.
type: integer
exclusiveMinimum: false
minimum: 1
|
minBatchSize
Minimum Batch Size
|
The smallest batch size with which to start training. Batch size will be increased linearly every epoch, upto the maximum batch size specified.
type: integer
default value: '64 '
exclusiveMinimum: false
minimum: 1
|
minCharLen
Minimum No. of Characters
|
Minimum length, in characters, for the text to be included into training.
type: integer
default value: '2 '
exclusiveMinimum: false
minimum: 1
|
minClassSize
Minimum no. of examples per class
|
Minimum number of samples that class should have to be included into training. Otherwise the class and all its samples are dropped.
type: integer
default value: '5 '
exclusiveMinimum: false
minimum: 2
|
minDf
Min Document Frequency
|
Minimum Df for token to be considered. Provide a float (0,1) if you want to specify as a fraction, otherwise integer >= 1 to specify the exact number of documents in which a token should occur.
type: number
default value: '1.0 '
|
minNgram
Min Ngram size
|
Minimum word or character ngram size to be used.
type: integer
exclusiveMinimum: false
minimum: 1
|
modelReplicas
Model replicas
|
How many replicas of the model should be deployed by Seldon Core
type: integer
default value: '1 '
exclusiveMinimum: false
minimum: 1
|
muNeg
Maximum negative class similarity
|
How similar algorithm should try to make embedding vectors for negative classes. The algorithm will try to minimize similarities so that it's lower than the value specified here.
type: number
default value: '-0.4 '
exclusiveMaximum: false
maximum: 1.0
|
muPos
Maximum correct class similarity
|
How similar algorithm should try to make embedding vectors for correct classes. The algorithm will try to maximize similarities so that it's higher than the value specified here.
type: number
default value: '0.8 '
exclusiveMaximum: false
maximum: 1.0
|
multiClass
Loss Method
|
Whether to train a binary classifier for each class or use a multinomial loss. ‘auto’ selects ‘ovr’ if the data is binary, or if algorithm=’liblinear’, and otherwise selects ‘multinomial’.
type: string
default value: 'auto '
enum: {
auto
ovr
multinomial
}
|
norm
Use Norm
|
Select the norm method to use.
type: string
default value: 'None '
enum: {
None
L1
L2
}
|
numEpochs
Number of training epochs
|
Number of epochs for which to train the model.
type: integer
default value: '40 '
exclusiveMinimum: false
minimum: 1
|
numNeg
Number of negative classes for training
|
Number of negative classes to use during training to minimize their similarity to the input text. Should be less than the total number of classes.
type: integer
exclusiveMinimum: false
minimum: 1
|
penalty
Penalty
|
Specify the norm used in the penalization. l2 is supported only by the ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers. ‘elasticnet’ is only supported by the ‘saga’ solver. Select none, if you don't want to regularize (this is not supported by the `liblinear` solver).
type: string
default value: 'l2 '
enum: {
l1
l2
elsaticnet
none
}
|
randomSeed
Random Seed
|
Pseudorandom determinism fixed by keeping this seed constant
type: integer
default value: '12345 '
|
reg
Regularization term
|
This is the inverse of regularization strength. Smaller values result in stronger regularization.
type: number
default value: '1.0 '
|
regTerm
Regularization Term
|
Scale of L2 regularization
type: number
default value: '0.002 '
|
scaling
Scale Features
|
Whether to apply Standard Scaling (X - mean(X)) / std(X) for the features. If the feature vector is sparse (no dimensionality reduction is used), then only division on standard deviation will be applied.
type: boolean
default value: 'true '
|
similarityType
Similarity type
|
Type of similarity to use to compare the embedded vectors.
type: string
default value: 'cosine '
enum: {
cosine
inner
}
|
smoothIdf
Smooth IDF
|
Smooth IDF weights by adding one to document frequencies. Prevents zero divisions.
type: boolean
default value: 'true '
|
solver
Optimization Algorithm
|
The optimization algorithm to use to fit to the data. LBFGS and SAGA are good initial choices.
type: string
default value: 'lbfgs '
enum: {
lbfgs
newton-cg
liblinear
sag
saga
}
|
sparkConfig
Additional parameters
|
Provide additional key/value pairs to be injected into the training JSON map at runtime. Values will be inserted as-is, so use " to surround string values
type: array of object
object attributes: {
key
(required)
: {
display name: Parameter Name
type: string
}
value
: {
display name: Parameter Value
type: string
}
}
|
stopwordsBlobName
Stopwords Blob Store
|
Name of the stopwords blob resource. This is a .txt file with one stopword per line. By default the file is called stopwords/stopwords_en.txt however a custom file can also be used. Check documentation for more details on format and uploading to blob store.
type: string
default value: 'stopwords/stopwords_en.txt '
blobType: file:spark
reference: blob
|
sublinearTf
Sublinear TF
|
Whether to apply sublinear scaling to TF, i.e. replace tf with 1 + log(tf). It usually helps when characters are used.
type: boolean
default value: 'true '
|
textField
Training collection content field
required
|
Solr field name containing the text to be classified
type: string
minLength: 1
|
textLayersSizes
Hidden sizes before text embedding
|
Sizes of hidden layers before the embedding layer for text. Specify as a list of numbers for multiple layers or a single number for 1 layer. Leave blank if no hidden layers are required.
type: string
default value: '[256, 128] '
pattern: ^(\[(((\d)*,\s*)*(\d+)+)?\])?$
|
tokenPattern
Token filtering pattern
|
Regex pattern for filtering tokens.
type: string
default value: '(?u)\b\w\w+\b '
|
tol
Stopping tolerance
|
Tolerance for stopping criteria.
type: number
default value: '0.0001 '
|
topK
Number of Output classes
|
Number of most probable output classes to assign to each sample along with their scores.
type: integer
default value: '1 '
exclusiveMinimum: false
minimum: 1
|
trainingCollection
Training Collection
required
|
Solr Collection containing documents for classification.
type: string
minLength: 1
|
trainingDataFilterQuery
Training Data Filter Query
|
Solr query to filter training data (e.g. downsampling or selecting based on min. pref values)
type: string
default value: '*:* '
|
trainingSampleFraction
Training Data Sampling Fraction
|
Choose a fraction of the data for training.
type: number
default value: '1.0 '
exclusiveMaximum: false
maximum: 1.0
|
type
Spark Job Type
required
|
type: string
default value: 'argo-classification '
enum: {
argo-classification
}
|
unidecodeTexts
Unidecode Text
|
Select if you want the text to be unidecoded
type: boolean
default value: 'true '
|
useCharacters
Use Characters
|
Whether to use the characters or word analyzer. Use words if the text is long. Using characters on long text can significantly increase vectorization time and memory requirements.
type: boolean
default value: 'true '
|
useClassWeights
Use class weights
|
If true, a weight is applied to each class inversely proportional to its frequency.
type: boolean
default value: 'false '
|
useMaxNegSim
Only minimize max. negative similarity
|
If true, only the maximum similarity for negative classes will be minimized. If unchecked, all negative similarities will be used.
type: boolean
default value: 'true '
|
valSize
Validation set size
|
Size of the validation dataset. Provide a float (0, 1) if you want to sample as a fraction, or an integer >= 1 if you want to sample exact number of records.
type: number
default value: '0.1 '
|
workflowType
Method
required
|
Method to be used for classification.
type: string
default value: 'Logistic Regression '
enum: {
Logistic Regression
Starspace
}
|