collection
Collection
|
Collection documents will be indexed to.
type: string
pattern: ^[a-zA-Z0-9_-]+$
|
fusion_batchsize
Batch Size
|
Fusion Client Batch Size
type: integer
default value: '500 '
exclusiveMinimum: true
minimum: 1
|
fusion_buffer_timeoutms
Timeout (ms)
|
Fusion Client Timeout (ms).
type: integer
default value: '1000 '
exclusiveMinimum: true
minimum: 1
|
fusion_endpoints
List of Fusion Endpoints
required
|
type: array of string
minimum number of items (minItems ): 1
default value: [
http://localhost:8764 ,
]
|
fusion_fail_on_error
Fail on Error
|
Fusion Client Fail on Error
type: boolean
default value: 'false '
|
fusion_login_app_name
Config App Name
|
Login Config App Name FusionClient by default.
type: string
default value: 'FusionClient '
|
fusion_login_config
Login Config
|
The file path of Login Configuration for Fusion kerberized, it must be placed in every mapper/reduce node.
type: string
|
fusion_password
Password
|
Fusion client User's password, leave empty if kerberos is use.
type: string
|
fusion_realm
Fusion client's Authentication
|
Fusion's Realm, If 'native' is selected the password is mandatory. If 'kerberos' is selected the Login Configuration is mandatory.
type: string
default value: 'NATIVE '
enum: {
NATIVE
KERBEROS
}
|
fusion_user
User/Principal
required
|
Fusion client's User or Principal if Kerberos is chosen.
type: string
|
hadoop_home
Hadoop home
required
|
Path to the Hadoop home directory where $HADOOP_HOME/bin/hadoop can be found. The connector requires access to either a full Hadoop installation, or a Hadoop client provided by your Hadoop distribution that has been configured to access the Hadoop installation.
type: string
minLength: 1
|
hadoop_input
Input source
required
|
Hadoop input source file/directory
type: string
minLength: 1
|
hadoop_mapper
Mapper
required
|
Hadoop Ingest Mapper
type: string
default value: 'CSV '
enum: {
CSV
DIRECTORY
GROK
REGEX
SEQUENCE_FILE
SOLR_XML
WARC
ZIP
}
|
job_jar
Job Jar
required
|
Path and name of the Hadoop job jar. Unless you are using a custom job jar, the default provided by Fusion is preferred.
type: string
default value: 'lucidworks-hadoop-job-2.2.7.jar '
minLength: 1
|
job_jar_path
job_jar_path
|
The hadoop job path added by the connector.
type: string
|
kinit_cache
'kerberos' cache
|
Full path of 'kerberos' cache. If this path does not exist, it will be created.
type: string
|
kinit_cmd
'kinit' command
|
Full path to the 'kinit' binary.
type: string
default value: 'kinit '
|
kinit_keytab
'kerberos' keytab
|
Full path to the Kerberos keytab file.
type: string
|
kinit_principal
'kerberos' principal
|
Kerberos principal name, i.e., username@YOUR-REALM.COM
type: string
|
mapper_args
Job Jar arguments
|
Parameters for the Hadoop job.
type: array of object
object attributes: {
arg_name
: {
display name: name
type: string
enum: {
csvFieldMapping
csvDelimiter
csvFirstLineComment
csvStrategy
idField
add.subdirectories
grok.uri
grok.config.path
grok.additional.patterns
com.lucidworks.hadoop.ingest.RegexIngestMapper.regex
com.lucidworks.hadoop.ingest.RegexIngestMapper.groups_to_fields
com.lucidworks.hadoop.ingest.RegexIngestMapper.match
}
}
arg_value
: {
display name: value
type: string
}
}
|
reducers
Number of Reducers
|
(Expert) Depending on the OutputFormat and your system resources, you may wish to have Hadoop do a reduce step first so as to not open too many connections to the output resource
type: integer
default value: '0 '
exclusiveMinimum: false
minimum: 0
|
run_kinit
Run 'kinit'
|
If your Hadoop installation requires job requests to authenticate with Kerberos, this option will allow Fusion to run 'kinit' to get a valid ticket.
type: boolean
default value: 'false '
|