> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# CSV Parser Stage

export const schema = {
  "type": "object",
  "title": "CSV",
  "description": "Parse CSV content",
  "required": ["charset", "ignoreBOM", "type"],
  "properties": {
    "id": {
      "type": "string",
      "title": "Parser ID",
      "default": "2150fab3-495b-4582-b8ad-5985cca3aea5"
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A label for this Parser Stage",
      "maxLength": 255
    },
    "enabled": {
      "type": "boolean",
      "title": "Enable this Parser Stage",
      "default": true
    },
    "mediaTypes": {
      "type": "array",
      "title": "Media Types to match",
      "description": "Documents with a media type on this list will be matched by this parser stage. See inheritMediaTypes / use default media types for more.",
      "items": {
        "type": "string",
        "pattern": "^[^\\/]+\\/[^\\/]+$",
        "format": "rfc2646"
      }
    },
    "inheritMediaTypes": {
      "type": "boolean",
      "title": "Match default media types in this Parser Stage",
      "description": "Each parser stage has a built-in list of media types it handles by default. If this setting is true, that list will be used along with any optional additional types provided in the mediaTypes list. If this setting is false, this stage will only be selected for media types in the mediaTypes list, and the mediaTypes list becomes a mandatory property which must have at least one valid media type.",
      "default": true
    },
    "ignoredMediaTypes": {
      "type": "array",
      "title": "Media Types to ignore",
      "description": "Documents with a media type on this list will be not be processed by this parser stage.",
      "items": {
        "type": "string",
        "pattern": "^[^\\/]+\\/[^\\/]+$",
        "format": "rfc2646"
      }
    },
    "pathPatterns": {
      "type": "array",
      "title": "File names to parse",
      "description": "Specify a file name or pattern that must be matched for this parser stage to run. Forward slashes (\"/\") are used to join names of files inside archives with the archive name.",
      "items": {
        "type": "object",
        "properties": {
          "syntax": {
            "type": "string",
            "title": "Pattern type",
            "description": "glob uses bash shell-style wildcards; regex uses Java (PCRE-style) regex",
            "enum": ["glob", "regex"],
            "default": "glob"
          },
          "pattern": {
            "type": "string",
            "title": "File name or pattern",
            "description": "e.g.: \"z.txt\" or \"*.md\" or \"/a/*/b/f.txt\" for glob; \"z.txt$\" or \".*\\.txt$\" or \"^/a/[^\\/]*/b/f.txt$\" for regex"
          }
        }
      }
    },
    "errorHandling": {
      "type": "string",
      "title": "Error Handling",
      "enum": ["ignore", "log", "fail", "mark"],
      "default": "mark"
    },
    "outputFieldPrefix": {
      "type": "string",
      "title": "Prefix parsed fields with",
      "description": "Fields extracted by this parser will be prefixed with this string. The remainder of the field name will be as detected in the stream",
      "maxLength": 20,
      "pattern": "^$|^[A-Za-z_][A-Za-z0-9_\\-\\.]+$"
    },
    "charset": {
      "type": "string",
      "title": "Character Set",
      "description": "Example: \"UTF-8\"",
      "default": "detect"
    },
    "ignoreBOM": {
      "type": "boolean",
      "title": "Ignore BOM",
      "description": "Ignore Byte-Order Mark (BOM) if present and always use the configured character set. When set to false a valid BOM character set overrides the configured default character set.",
      "default": false
    },
    "delimiter": {
      "type": "string",
      "title": "Delimiter",
      "description": "Delimiter character between fields. Any single character, including an escaped character, is valid, e.g. , (comma), \\t (tab), or | (pipe). Default is comma if auto-detection is disabled",
      "minLength": 1
    },
    "quote": {
      "type": "string",
      "title": "Quote",
      "description": "Quote character, default is a double quote (\") if auto-detection is disabled",
      "maxLength": 1
    },
    "quoteEscape": {
      "type": "string",
      "title": "Quote escape",
      "description": "Quote escape character, default is a double quote (\") if auto-detection is disabled ",
      "maxLength": 1
    },
    "autoDetect": {
      "type": "boolean",
      "title": "Auto-detect CSV Format",
      "description": "Attempt to guess the delimiter, quote, quote escape, and comment characters",
      "default": true
    },
    "trimWhitespace": {
      "type": "boolean",
      "title": "Trim whitespace",
      "description": "Trim off leading and trailing whitespace from columns, default true",
      "default": true
    },
    "hasHeaders": {
      "type": "boolean",
      "title": "Headers in file",
      "description": "Treat the first row as column headers, default true",
      "default": true
    },
    "headers": {
      "type": "array",
      "title": "Header list",
      "description": "List of column headers, overrides file headers if present",
      "items": {
        "type": "string"
      }
    },
    "skipEmptyLines": {
      "type": "boolean",
      "title": "Skip empty lines",
      "description": "Skip any empty lines encountered, default true",
      "default": true
    },
    "lineSeparator": {
      "type": "string",
      "title": "Line Separator",
      "description": "Line separator character",
      "minLength": 1
    },
    "nullValue": {
      "type": "string",
      "title": "Null value",
      "description": "A string value to replace nulls with, no default"
    },
    "emptyValue": {
      "type": "string",
      "title": "Empty string replacement",
      "description": "A string value to replace empty strings with, no default"
    },
    "includeRowNumber": {
      "type": "boolean",
      "title": "Include row number",
      "description": "Include the row number (line number) in the emitted documents, default true",
      "default": true
    },
    "comment": {
      "type": "string",
      "title": "Comment character",
      "description": "Character at start of row to indicate a comment, default is hash (#) if auto-detection is disabled",
      "maxLength": 1
    },
    "commentHandling": {
      "type": "string",
      "title": "Comment Handling",
      "description": "How to handle comments: ignore, add as field to next document, or add a separate documents, default ignore",
      "enum": ["ignore", "as_field", "as_document"],
      "default": "ignore"
    },
    "maxRowLength": {
      "type": "integer",
      "title": "Maximum line length",
      "description": "Maximum number of characters to allow for a single read line, default 10MB",
      "default": 10485760,
      "maximum": 2147483647,
      "exclusiveMaximum": false,
      "minimum": 0,
      "exclusiveMinimum": false
    },
    "maxNumColumns": {
      "type": "integer",
      "title": "Maximum number of columns",
      "description": "Maximum number of columns to allow for a single row, default 1000",
      "default": 1000,
      "maximum": 2147483647,
      "exclusiveMaximum": false,
      "minimum": 0,
      "exclusiveMinimum": false
    },
    "maxColumnChars": {
      "type": "integer",
      "title": "Maximum number or characters per column",
      "description": "Maximum number of characters a single column value can have, default 10MB",
      "default": 10485760,
      "maximum": 2147483647,
      "exclusiveMaximum": false,
      "minimum": 0,
      "exclusiveMinimum": false
    },
    "columnHandling": {
      "type": "string",
      "title": "Column mismatch handling",
      "description": "What to do when a row has too many or too few columns: Can throw an error, align the column, or do nothing special (default)",
      "enum": ["error", "align", "default"],
      "default": "default"
    },
    "fillValue": {
      "type": "string",
      "title": "Column fill value",
      "description": "A string value to use when aligning the columns (when Column Mismatch Handling is \"align\")",
      "default": "<FILL>"
    },
    "type": {
      "type": "string",
      "enum": ["csv"],
      "default": "csv"
    }
  },
  "additionalProperties": false,
  "category": "Other",
  "categoryPriority": 1,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/5/fusion/reference/config-ref/parser-stages/csv-parser

[mintlify link]: https://doc.lucidworks.com/docs/5/fusion/reference/config-ref/parser-stages/csv-parser

[old doc.lw link]: https://doc.lucidworks.com/fusion/5.9/375

This parser breaks down incoming CSV files into the most efficient components for Fusion to index.
It produces one new document per row from the CSV input, excluding comment rows and header rows.

<LwTemplate />

If your CSV file contains a column named `id`, this column is consumed to populate the document's unique identifier (Solr's `uniqueKey`) and is not available as a stored field.

This behavior occurs because the default value of the parser's **Document ID Source Field** parameter is also `id`. When a CSV column matches this parameter:

* The column's value is used to generate the document ID.
* The column does not appear in the indexed document as a field.

If you need to preserve your `id` column data as a regular field, use one of these options:

* Change the column header from `id` to another name such as `record_id` or `item_id`. This is the simplest solution.
* In the CSV parser stage configuration, set the **Prefix parsed fields with** parameter to a value such as `csv_`. This makes the `id` column appear as `csv_id` in your indexed documents.
* In the Index Workbench's parser configuration, set the **Document ID Source Field** to a different column name. This allows `id` to be treated as a normal field, but you must specify a different column to use as the document identifier.

See [Parsers Overview](/docs/5/fusion/reference/config-ref/parser-stages/overview#configure-a-parser-in-index-parsers) for information about configuring the **Document ID Source Field** parameter.

<Tip>
  When entering configuration values in the UI, use *unescaped* characters, such as `\t` for the tab character. When entering configuration values in the API, use *escaped* characters, such as `\\t` for the tab character.
</Tip>

<SchemaParamFields schema={schema} />
