> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# NLP Annotator Query Stage

export const schema = {
  "type": "object",
  "title": "NLP Annotator",
  "description": "Annotate a query using NLP",
  "required": ["annotatorType"],
  "properties": {
    "skip": {
      "type": "boolean",
      "title": "Skip This Stage",
      "description": "Set to true to skip this stage.",
      "default": false,
      "hints": ["advanced"]
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A unique label for this stage.",
      "hints": ["advanced"],
      "maxLength": 255
    },
    "condition": {
      "type": "string",
      "title": "Condition",
      "description": "Define a conditional script that must result in true or false. This can be used to determine if the stage should process or not.",
      "hints": ["code", "code/javascript", "advanced"]
    },
    "annotatorType": {
      "type": "string",
      "title": "Annotator Type",
      "enum": ["sparknlp", "opennlp"],
      "default": "sparknlp"
    },
    "sparknlpNERModel": {
      "type": "string",
      "title": "Spark NLP NER Model",
      "description": "If Spark NLP annotator is used, specify the blobstore location of the NER model",
      "minLength": 1,
      "reference": "blob",
      "blobType": "model:spark-nlp"
    },
    "sparknlpPOSModel": {
      "type": "string",
      "title": "Spark NLP POS Model",
      "description": "If Spark NLP annotator is used, specify the blobstore location of the POS model",
      "minLength": 1,
      "reference": "blob",
      "blobType": "model:spark-nlp"
    },
    "failOnError": {
      "type": "boolean",
      "title": "Fail on Error",
      "description": "Flag to indicate if this stage should throw an exception if an error occurs while generating a prediction for a document.",
      "default": false
    },
    "config": {
      "type": "string",
      "title": "Model Configuration",
      "description": "Advanced configuration for NLP implementations",
      "default": "{\n  \"opennlp\": {\n    \"posModelLocation\": \"nlp/models/en-pos-maxent.bin\",\n    \"chunkerModelLocation\": \"nlp/models/en-chunker.bin\",\n    \"tokenizerModelLocation\": \"nlp/models/en-token.bin\",\n    \"sentenceModelLocation\": \"nlp/models/en-sent.bin\",\n    \"tokenNameFinderModelLocations\": {\n        \"MONEY\": \"nlp/models/en-ner-money.bin\",\n        \"PERCENT\": \"nlp/models/en-ner-percentage.bin\",\n        \"PERSON\": \"nlp/models/en-ner-person.bin\",\n        \"LOCATION\": \"nlp/models/en-ner-location.bin\",\n        \"TIME\": \"nlp/models/en-ner-time.bin\",\n        \"DATE\": \"nlp/models/en-ner-date.bin\",\n        \"ORG\": \"nlp/models/en-ner-organization.bin\"\n    }\n  }\n}\n",
      "hints": ["code", "hidden"]
    },
    "inputParam": {
      "type": "string",
      "title": "Input parameter to annotate",
      "description": "Name of the request parameter to annotate using NLP",
      "default": "q",
      "minLength": 1
    },
    "extractorRules": {
      "type": "array",
      "title": "Extractor Rules",
      "description": "Define rules to extract annotated text into separate parameters",
      "items": {
        "type": "object",
        "required": ["extractedAnnotationType", "labelPattern", "targetParamName"],
        "properties": {
          "extractedAnnotationType": {
            "type": "string",
            "title": "Annotation Type to Extract",
            "enum": ["sentence", "named_entity", "part_of_speech", "noun_chunk"],
            "default": "named_entity"
          },
          "labelPattern": {
            "type": "string",
            "title": "Label Pattern",
            "description": "Extract all annotations with labels that match this regular expression",
            "default": ".*",
            "format": "regex"
          },
          "targetParamName": {
            "type": "string",
            "title": "Target Parameter Name"
          },
          "flattenList": {
            "type": "boolean",
            "title": "Flatten Multiple",
            "description": "Flatten multiple values into a single value by joining on space"
          }
        }
      }
    }
  },
  "category": "Advanced",
  "categoryPriority": 2,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/4/fusion-ai/reference/query-pipeline-stages/nlp-annotator-query-stage

[mintlify link]: https://doc.lucidworks.com/docs/4/fusion-ai/reference/query-pipeline-stages/nlp-annotator-query-stage

[old doc.lw link]: https://doc.lucidworks.com/fusion/5.9/523

Like the [NLP Annotator index stage](/docs/4/fusion-ai/reference/index-pipeline-stages/nlp-annotator-index-stage), the NLP Annotator query stage can be included in a query pipeline to perform [Natural Language Processing](/docs/4/fusion-ai/concepts/nlp) tasks.
1.Example of how to use NLP Annotator Query stage:

1. Add the NLP Annotator query stage to the query pipeline.
2. Configure the query pipeline stage:
   1. Specify the model to use (fill the box with `model id` in the blob store).
   2. Specify the input parameter, label pattern and target parameter fields:
      <Frame>
        <img src="https://mintcdn.com/lucidworks/1R8QVvJzt46cZDT6/assets/images/4.2/nlp_annotator_query_config.png?fit=max&auto=format&n=1R8QVvJzt46cZDT6&q=85&s=b21a7c6711bc46ab17f915a303c59d6a" alt="query pipeline configuration" width="1512" height="570" data-path="assets/images/4.2/nlp_annotator_query_config.png" />
      </Frame>
      1. **input parameter field:** the Fusion query parameter text, normally `q` since we want to annotate the raw query string to understand the intent.
      2. **label pattern:** regex pattern that matches the NER/POS labels: for example, `PER.` will match extracted name entities with label `PERSON`, while `NN.` will match tagged nouns.
      3. **target parameter field:** the outcome extraction/tagging.
      4. For the query stage, the result is set to be put in a new query parameter field:
         <img src="https://mintcdn.com/lucidworks/1R8QVvJzt46cZDT6/assets/images/4.2/nlp_annotator_query_result.png?fit=max&auto=format&n=1R8QVvJzt46cZDT6&q=85&s=de07f316a8b421fe6150ef0adb59e20d" alt="NLP annotator query result" width="674" height="356" data-path="assets/images/4.2/nlp_annotator_query_result.png" />

<LwTemplate />

## Configuration

<Tip>
  When entering configuration values in the UI, use *unescaped* characters, such as `\t` for the tab character. When entering configuration values in the API, use *escaped* characters, such as `\\t` for the tab character.
</Tip>

<SchemaParamFields schema={schema} />
