> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Date Parsing Index Stage

export const schema = {
  "type": "object",
  "title": "Date Parsing",
  "description": "This stage converts dates from any arbitrary input format into Solr's date format",
  "properties": {
    "skip": {
      "type": "boolean",
      "title": "Skip This Stage",
      "description": "Set to true to skip this stage.",
      "default": false,
      "hints": ["advanced"]
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A unique label for this stage.",
      "hints": ["advanced"],
      "maxLength": 255
    },
    "condition": {
      "type": "string",
      "title": "Condition",
      "description": "Define a conditional script that must result in true or false. This can be used to determine if the stage should process or not.",
      "hints": ["code", "code/javascript", "advanced"]
    },
    "requireTimezone": {
      "type": "boolean",
      "title": "Require Timezone",
      "description": "Accept only formats that explicitly specify the timezone",
      "default": false
    },
    "ignoreInvalid": {
      "type": "boolean",
      "title": "Ignore Invalid",
      "description": "When false invalid date strings will cause the whole document to be rejected. When true invalid values are silently discarded",
      "default": false
    },
    "splitLocal": {
      "type": "boolean",
      "title": "Split Local Date Into Parts",
      "description": "Split local date (in the local timezone) into parts and store in <part>.local.<sourceField> fields",
      "default": false,
      "hints": ["advanced"]
    },
    "splitUTC": {
      "type": "boolean",
      "title": "Split UTC Date Into Parts",
      "description": "Split UTC date (in the UTC timezone) into parts and store in <part>.utc.<sourceField> fields",
      "default": false,
      "hints": ["advanced"]
    },
    "sourceFields": {
      "type": "array",
      "title": "Source Fields",
      "items": {
        "type": "string"
      }
    },
    "dateFormats": {
      "type": "array",
      "title": "Date Formats",
      "description": "Custom date formats, or empty for default formats",
      "items": {
        "type": "string"
      }
    },
    "defaultTimezone": {
      "type": "string",
      "title": "Default Timezone",
      "description": "Timezone to assume if one is not present in the incoming date"
    },
    "defaultLocale": {
      "type": "string",
      "title": "Default Locale",
      "description": "Locale to assume if different from Locale.ENGLISH. This uses IETF BCP 47 codes."
    }
  },
  "category": "Field Transformation",
  "categoryPriority": 7,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/5/fusion/reference/config-ref/pipeline-stages/index-stages/date-parsing-index-stage

[mintlify link]: https://doc.lucidworks.com/docs/5/fusion/reference/config-ref/pipeline-stages/index-stages/date-parsing-index-stage

[old doc.lw link]: https://doc.lucidworks.com/fusion/5.9/241

The Date Parsing Stage (previously called the Date Parser stage) is an index pipeline stage that performs parsing and normalization of date/time data in document fields which uses the Fusion DateUtils library. The resulting date/time information is available both as a timestamp in UTC time zone as well as a local date/time in the original local time zone.

The time zone name, offset and the epoch time are stored in separate fields, too. Additionally the formatted dates can be split into their components, and each component added to separate document fields.

Note that this stage works only with data that consists solely of the date/time information, i.e. it will not work correctly if dates are a part of a larger piece of text.

<LwTemplate />

## Timestamp splitting options

Splitting options help in processing timestamp information without resorting to scripting - e.g. in order to index day of week information it is more convenient and faster to split the timestamp in this stage, and then just discard other components that are not needed (using a field mapping stage), rather than using a JavaScript stage to parse and split the timestamp manually.

Please note that time zone name and time zone offset, as well as epoch time, are always added as separate fields regardless of the splitting options.
E.g. for a field named `test` these values will be added as fields `tz.test`, `tz_offset.test`, and `epoch.test`.

The option `splitLocal` splits the timestamp in its original timezone, while the option `splitUTC` first converts the timestamp to UTC and then splits it. The resulting date and time components are stored in fields that follow patterns \<part>.local.\<fieldName> and \<part>.utc.\<fieldName> respectively.

The following parts are extracted and added to the document:

* **year.** year component
* **month.** month in year, from 1 to 12
* **day.** day in month, from 1 to 31
* **yday.** day in year, from 1 to 356
* **weekday.** day of week, 1 being Monday and 7 being Sunday
* **week.** week in year, from 1 to 52. Note: in the standard ISO8601 week algorithm, the first week of the year is that in which at least 4 days are in the year. As a result of this definition, day 1 of the first week may be in the previous year, which will be indicated by weekyear. The opposite is also true - last day of the last week may be in the next year, and weekyear will show the next year.
* **weekyear.** year corresponding to the week value. This can be either the current year or previous one, or the next one.
* **hour.** hour in day, from 0 to 23
* **min.** minute in hour, from 0 to 59
* **sec.** second in minute, from 0 to 59
* **ms.** millisecond in second, from 0 to 999

Example: given this normalized timestamp in the original timezone `2015-01-01 00:00:00.000 Europe/Warsaw` in a field `test`, the corresponding normalized UTC timestamp will be `2014-12-31T23:00:00.00Z`.

### Example: `splitLocal` parsing

The following table shows the additional fields added to a document as the result of applying `splitLocal` parsing to the contents a field named `test` which contains the value `2015-01-01 00:00:00.000 Europe/Warsaw`:

| Field name      | value         |
| --------------- | ------------- |
| tz.test         | Europe/Warsaw |
| tz\_offset.test | +01:00        |
| epoch.test      | 1420066800000 |

### Example: `splitUTC` parsing

The following table shows the additional fields added to a document as the result of applying `splitUTC` parsing to the contents a field named `test` which contains the value `2015-01-01 00:00:00.000 Europe/Warsaw`:

| Field name          | value         |
| ------------------- | ------------- |
| tz.test             | Europe/Warsaw |
| tz\_offset.test     | +01:00        |
| epoch.test          | 1420066800000 |
| year.utc.test       | 2014          |
| year.local.test     | 2015          |
| month.utc.test      | 12            |
| month.local.test    | 1             |
| day.utc.test        | 31            |
| day.local.test      | 1             |
| yday.utc.test       | 365           |
| yday.local.test     | 1             |
| weekday.utc.test    | 3             |
| weekday.local.test  | 4             |
| week.utc.test       | 1             |
| week.local.test     | 1             |
| weekyear.utc.test   | 2015          |
| weekyear.local.test | 2015          |
| hour.utc.test       | 23            |
| hour.local.test     | 0             |
| min.utc.test        | 0             |
| min.local.test      | 0             |
| sec.utc.test        | 0             |
| sec.local.test      | 0             |
| ms.utc.test         | 0             |
| ms.local.test       | 0             |

Note: The following:

* weekday is different. UTC day of week is Wednesday, and local day of week is already Thursday.
* yday in UTC points to the last day of the year, while it is the first day of the year in local time zone, similarly with day.
* week and weekyear are the same in both cases. because according to the ISO 8601 definition all days of this week belong to year 2015 so it does not matter whether it is Wednesday or Thursday.

## Configuration

### Adding a New Format

If no new format is added, a default list of formats will be used. The default formats are listed in the table below:

| Pattern                           | Notes                                        |
| --------------------------------- | -------------------------------------------- |
| `EEE yyyy-MM-dd HH:mm:ss.SSS zzz` |                                              |
| `yyyy-MM-dd’T’HH:mm:ss.SSSZ`      | With numeric +-HHmm timezone at the end      |
| `yyyy-MM-dd’T’HH:mm:ss.SSSZZ`     | With numeric +-HH:mm timezone at the end     |
| `yyyy-MM-dd’T’HH:mm:ss.SSSz`      | With symbolic XXX timezone at the end        |
| `yyyy-MM-dd’T’HH:mm:ssz`          | With symbolic XXX timezone at the end        |
| `yyyy-MM-dd’T’HH:mm:ssZ`          | With offset                                  |
| `EEE MMM d HH:mm:ss z yyyy`       |                                              |
| `EEE MMM d HH:mm:ss Z yyyy`       |                                              |
| `EEE MMM d HH:mm:ss z yyyy`       |                                              |
| `EEE MMM d HH:mm:ss.SSS z yyyy`   |                                              |
| `EEE, dd MMM yyyy HH:mm:ss zzz`   |                                              |
| `EEEE, dd-MMM-yy HH:mm:ss zzz`    |                                              |
| `yyyy-MM-dd HH:mm:ss Z`           |                                              |
| `yyyy-MM-dd HH:mm:ss ZZ`          |                                              |
| `yyyy-MM-dd HH:mm:ss z`           |                                              |
| `yyyy-MM-dd HH:mm:ss.SSS Z`       |                                              |
| `yyyy-MM-dd HH:mm:ss.SSS ZZ`      |                                              |
| `yyyy-MM-dd HH:mm:ss.SSS z`       |                                              |
| `yyyy-MM-dd HH:mm:ss zzz`         | With full time zone (e.g. America/New\_York) |
| `yyyy-MM-dd’T’HH:mm:ss’GMT’Z`     | With literal "GMT" and offset                |
| `yyyy-MM-dd’T’HH:mm:ss.SSS’GMT’Z` | With literal "GMT" and offset                |
| `yyyy-MM-dd’T’HH:mm:ss’UTC’Z`     | With literal "UTC" and offset                |
| `yyyy-MM-dd’T’HH:mm:ss.SSS’UTC’Z` | With literal "UTC" and offset                |
| `yyyy-MM-dd HH:mm:ss 'UTC’Z`      |                                              |
| `yyyy-MM-dd HH:mm:ss.SSS 'UTC’Z`  |                                              |
| `yyyy-MM-dd HH:mm:ss 'GMT’Z`      |                                              |
| `yyyy-MM-dd HH:mm:ss.SSS 'GMT’Z`  |                                              |
| `dd-MM-yyyy HH:mm:ss zzz`         |                                              |

The list of accepted symbols for constructing a new format are listed below:

| Symbol | Meaning                      | Example                                |
| ------ | ---------------------------- | -------------------------------------- |
| `G`    | era                          | `AD`                                   |
| `C`    | century of era (>=0)         | `20`                                   |
| `Y`    | year of era (>=0)            | `1996`                                 |
| `x`    | weekyear                     | `1996`                                 |
| `w`    | week of weekyear             | `27`                                   |
| `e`    | day of week                  | `2`                                    |
| `E`    | day of week                  | `Tuesday` `Tue`                        |
| `y`    | year                         | `1996`                                 |
| `D`    | day of year                  | `189`                                  |
| `M`    | month of year                | `July` `Jul` `07`                      |
| `d`    | day of month                 | `10`                                   |
| `a`    | halfday of day               | `PM`                                   |
| `K`    | hour of halfday (0\~11)      | `0`                                    |
| `h`    | clockhour of halfday (1\~12) | `12`                                   |
| `H`    | hour of day (0\~23)          | `0`                                    |
| `k`    | clockhour of day (1\~24)     | `24`                                   |
| `m`    | minute of hour               | `30`                                   |
| `s`    | second of minute             | `55`                                   |
| `S`    | fraction of second           | `978`                                  |
| `z`    | time zone                    | `Pacific Standard Time`; `PST`         |
| `Z`    | time zone offset/id          | `-0800` `-08:00` `America/Los_Angeles` |
| `'`    | escape for text              |                                        |
| `''`   | single quote                 | `'`                                    |

The number of symbols used determine the format of the output.

For text output, if 4 or more symbols are used, the full form is used. `EEEE` would output `Tuesday`, while `E` outputs `Mon`.

For numeric outputs, the amount of symbols used is the same as the minimum digits used for representation.

If the number is smaller, then it will be padded with zeroes. `HH` would be `09`, while `H` is `9`.

For the year, the representation for year and weekyear is different. For example, if the count of `y` is two, the year would be the two digit, zero-based year of the century.

For the month, if there are three or more symbols, then the text is used. For example, `MM` would output `03`, `MMM` outputs `Mar`, and `MMMM` outputs `March`.

For the zone, `Z` outputs the offset without a colon, `ZZ` outputs the offset with a colon, and `ZZZ` or more outputs the zone id.

For the zone name, time zone names `'z'` cannot be parsed.

Any characters in the input that are not in the range of `['a'…​'z']` and `['A'…​'Z']` will be treated like quoted text. So, characters like `','` or `'-'` will appear in the output time text even if they are not wrapped in single quotes.

A new configuration can be made using a combination of the symbols and regular characters. For example, the pattern `E MM/dd/yyyy HH:mm:ss.SSS` would output the formatted date `Mon 01/16/2017 10:53:32.939`.

<Tip>
  When entering configuration values in the UI, use *unescaped* characters, such as `\t` for the tab character. When entering configuration values in the API, use *escaped* characters, such as `\\t` for the tab character.
</Tip>

<SchemaParamFields schema={schema} />
