> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Text Processing Index Stage

export const schema = {
  "type": "object",
  "title": "Text Processing",
  "description": "This stage processes text according to the transformations applied per field",
  "properties": {
    "skip": {
      "type": "boolean",
      "title": "Skip This Stage",
      "description": "Set to true to skip this stage.",
      "default": false,
      "hints": ["advanced"]
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A unique label for this stage.",
      "hints": ["advanced"],
      "maxLength": 255
    },
    "condition": {
      "type": "string",
      "title": "Condition",
      "description": "Define a conditional script that must result in true or false. This can be used to determine if the stage should process or not.",
      "hints": ["code", "code/javascript", "advanced"]
    },
    "rules": {
      "type": "array",
      "title": "Transformation Rules",
      "description": "The rules follow this pattern :(source as a regexp field matcher - the pattern can match multiple fields, the results will be concatenated) -> Transformation -> (target as a single field name); Some transformations can accept arguments. If multiple rules point to the same target, then each subsequent rule will use the output from the preceding one instead of using its configured source.",
      "items": {
        "type": "object",
        "required": ["source", "target", "transformation"],
        "properties": {
          "source": {
            "type": "string",
            "title": "Source Field",
            "format": "regex"
          },
          "target": {
            "type": "string",
            "title": "Target Field"
          },
          "transformation": {
            "type": "string",
            "title": "Transformation",
            "description": "Choose transformation to apply. Available options: FIRST_N_CHARACTERS - leave only the first N characters of the text. Requires one argument - the N as an integer, LAST_N_CHARACTERS - similar, but leaves the last N characters instead of the first, COLLAPSE_WHITESPACE - replace groups of whitespace characters (space, newline, etc.) with just a single one, LOWERCASE - lowercase the text, UPPERCASE - uppercase the text, FILTER_NON_ASCII - replace characters that are not present in ASCII set with their ASCII equivalent or remove them if they do not have an equivalent, URL_DECODE - decode URL encoded characters to plain text.",
            "enum": ["first_n_characters", "last_n_characters", "collapse_whitespace", "lowercase", "uppercase", "filter_non_ascii", "url_decode"],
            "default": "lowercase",
            "hints": ["advanced"]
          },
          "transformationConfig": {
            "type": "string",
            "title": "Transformation config",
            "description": "Add necessary parameters for transformation. For example, a FIRST_N_CHARS needs to have the number of characters configured",
            "hints": ["advanced"]
          }
        }
      }
    }
  },
  "category": "Field Transformation",
  "categoryPriority": 7,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/5/fusion/reference/config-ref/pipeline-stages/index-stages/text-processing-index-stage

[mintlify link]: https://doc.lucidworks.com/docs/5/fusion/reference/config-ref/pipeline-stages/index-stages/text-processing-index-stage

[old doc.lw link]: https://doc.lucidworks.com/fusion/5.9/uyh1d1

The Text Processing Index Stage processes simple text according to the transformation applied in a field.

<LwTemplate />

<Note>
  This stage requires the Tika service in order to be deployed inside the cluster. It will only be deployed inside the cluster *by default* in environments where **Transport Layer Security** has been enabled\*.
</Note>

<SchemaParamFields schema={schema} />
