> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Chunking Neural Hybrid Query Stage

export const schema = {
  "type": "object",
  "title": "Chunking Neural Hybrid Query",
  "description": "Hybrid of Multi-Vector and Lexical Search. Will be skipped if query string is blank or wildcard (* or *:*).  Note this will not work well if the incoming q parameter is a Solr query parser string (e.g. field_t:foo) rather than a raw user query string.  Note: The resulting query will ALWAYS be written to <request.params.q>. ",
  "required": ["lexicalQuery", "lexicalWeight", "lexicalSquash", "vectorQueryField", "vector", "vectorWeight", "minReturnSim", "minTraverseSim"],
  "properties": {
    "skip": {
      "type": "boolean",
      "title": "Skip This Stage",
      "description": "Set to true to skip this stage.",
      "default": false,
      "hints": ["advanced"]
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A unique label for this stage.",
      "hints": ["advanced"],
      "maxLength": 255
    },
    "condition": {
      "type": "string",
      "title": "Condition",
      "description": "Define a conditional script that must result in true or false. This can be used to determine if the stage should process or not.",
      "hints": ["code", "code/javascript", "advanced"]
    },
    "legacy": {
      "type": "boolean",
      "title": "Legacy",
      "description": "True if this stage only supports legacy mode",
      "hints": ["readonly", "hidden"]
    },
    "lexicalQuery": {
      "type": "string",
      "title": "Lexical Query Input",
      "description": "The lexical query itself is retrieved from here.  This field supports Template Expressions such as '<request.params.q>' to evaluate the original user query.",
      "default": "<request.params.q>"
    },
    "lexicalWeight": {
      "type": "number",
      "title": "Lexical Query Weight",
      "description": "Relative weight of the lexical query. If this value is 0, no re-ranking will be applied using the lexical query scores.",
      "default": 0.1,
      "maximum": 10,
      "exclusiveMaximum": false
    },
    "lexicalSquash": {
      "type": "number",
      "title": "Lexical Query Squash Factor",
      "description": "The squash factor for the lexical query.  This value is used to squash the lexical query scores from 0..inf to 0..1, which can help to prevent the lexical query from dominating the final score.  NOTE: A good value here would be the inverse of the lexical maximum score across all queries for the given collection. NOTE: If this value is 0.0, the resulting squashed lexical scores will ALL by 0.0, resulting in lexical recall but vector scoring.",
      "default": 0.1,
      "maximum": 10,
      "exclusiveMaximum": false
    },
    "vectorQueryField": {
      "type": "string",
      "title": "Vector Query Field",
      "description": "The name of the Solr field for knn vector search."
    },
    "vector": {
      "type": "string",
      "title": "Vector Input",
      "description": "The Vector itself is retrieved from here.  This field supports Template Expressions such as '<ctx.vector>' to evaluate the context variable resulting from a previous stage, such as the Vectorize Query via Lucidworks AI stage.",
      "default": "<ctx.vector>"
    },
    "vectorWeight": {
      "type": "number",
      "title": "Vector Query Weight",
      "description": "Relative weight of the vector query.",
      "default": 0.9,
      "maximum": 10,
      "exclusiveMaximum": false,
      "minimum": 0.001,
      "exclusiveMinimum": false
    },
    "minReturnSim": {
      "type": "number",
      "title": "Min Return Vector Similarity",
      "description": "The minimum vector similarity value to qualify as a match from the Vector portion of the hybrid query.",
      "default": 0.5,
      "maximum": 1,
      "exclusiveMaximum": false
    },
    "minTraverseSim": {
      "type": "number",
      "title": "Min Traversal Vector Similarity",
      "description": "The minimum vector similarity value to use when walking the graph during the Vector portion of the hybrid query. Must be lower than, or equal to, the Min Return Vector Similarity",
      "default": 0.5,
      "maximum": 1,
      "exclusiveMaximum": false
    },
    "vecSimForLexOnly": {
      "type": "boolean",
      "title": "Compute Vector Similarity for Lexical-Only Matches",
      "description": "Compute the vector similarity score for documents that are not in the  vector result set but are in the lexical result set. ",
      "default": true
    },
    "vecPreFilterBoolean": {
      "type": "boolean",
      "title": "Block pre-filtering.",
      "description": "Checkbox to indicate whether or not to prevent pre-filtering. If checked, preFilter=\"\" will be added to the vector query, which will prevent pre-filtering from being applied to the query.",
      "default": true
    }
  },
  "category": "AI",
  "categoryPriority": 10,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/lucidworks-search/09-developer-documentation/config-specs/query-pipeline-stages/chunking-neural-hybrid-query-stage

[mintlify link]: https://doc.lucidworks.com/docs/lucidworks-search/09-developer-documentation/config-specs/query-pipeline-stages/chunking-neural-hybrid-query-stage

[old doc.lw link]: https://doc.lucidworks.com/managed-fusion/5.9/ylgrnx

Lucidworks Search 5.9.12 and later releases use index and query stages to split large documents into smaller, more manageable segments called chunks. For more information about chunking, chunking strategies and setting up chunking, see [Chunking](/docs/lucidworks-search/11-vector-search/chunking).

The Chunking Neural Hybrid Query stage performs hybrid lexical-semantic search that combines BM25-type lexical search with KNN dense vector search via Solr. This stage differs from the [Neural Hybrid Stage](/docs/lucidworks-search/09-developer-documentation/config-specs/query-pipeline-stages/neural-hybrid-query-stage) because it supports chunking.

Not sure which hybrid query stage is right for you? Read about the
[differences between the hybrid query stages](/docs/lucidworks-search/11-vector-search/hybrid-stage-differences).

<Note>
  This feature is available starting in Lucidworks Search 5.9.12 and in all subsequent Lucidworks Search 5.9 releases.
</Note>

Click **Get Started** below to see how to enable chunking in Lucidworks Search:

<iframe src="https://app.supademo.com/embed/cmfzg6uw4009oxx0i1ptmac82?embed_v=2&utm_source=embed" loading="lazy" title="Enable chunking in Fusion" allow="clipboard-write" frameborder="0" webkitallowfullscreen="true" mozallowfullscreen="true" allowfullscreen style={{  width: '100%', height: '500px' }} />

<LwTemplate />

## About the Lexical Query Squash Factor

The **Lexical Query Squash Factor** field lets you input a value that squashes the lexical query scores from `0..inf` to `0..1`.
This setting helps prevent the lexical query from dominating the final score, and normalizes the score into a range that works well with vector similarity scores.
Additionally, it helps prevent the [vanishing gradient problem](https://en.wikipedia.org/wiki/Vanishing_gradient_problem), which occurs when very high lexical scores are mapped to values extremely close to `1`, such as `0.99999999`.
During the hybrid search calculation, these near-1 values can cause the system to lose sensitivity to subtle differences in lexical relevance, effectively 'squashing' the gradient and reducing the impact of lexical scoring.

Lucidworks recommends setting the **Lexical Query Squash Factor** to the inverse of the maximum lexical score observed across your queries.
This helps balance the impact of lexical and vector scores, leading to more accurate and nuanced search results.

## Prefiltering

Prefiltering is a technique that can improve performance and accuracy by filtering documents before applying the algorithm, reducing the number of documents that need to be processed.
This is especially effective with the KNN algorithm.

Prefiltering is disabled by default.
**To enable it, uncheck **Block pre-filtering** in this stage.**

When prefiltering is enabled, you can configure the filters using one or both of these methods:

* **Security filters**\
  You can use security filters as prefilters by placing the [Graph Security Trimming Stage](/docs/5/fusion/reference/config-ref/pipeline-stages/query-stages/security-trimming-graph-query-stage) *after* this one in the pipeline.\
  Then Fusion uses the security trimming filter as a prefilter.
* **JavaScript**\
  When prefiltering is enabled, this stage adds a `preFilterKey` object to the Javascript `ctx` object.\
  You can place a [Javascript stage](/docs/5/fusion/reference/config-ref/pipeline-stages/query-stages/javascript-query-stage) after this one and use it to access the `preFilterKey` object, as in this example:

  ```js theme={"dark"}
  if(ctx.hasProperty("preFilterKey")) {
    var preFilter = ctx.getProperty("preFilterKey");
    preFilter.addFilter(filterQuery)
  }
  ```
* **Additional Query Parameters stage**\
  If you do not want to create a JavaScript stage, you can create additional query parameters to prefilter the documents to be processed by using what the previous JavaScript example adds to the request. The following example uses a single prefilter:

  ```js theme={"dark"}
  "fq" = "{!bool filter=$vectorPreFilter}"
  "vectorPreFilter" = "EXAMPLE_FILTER"
  ```

  The following example uses multiple prefilters:

  ```js theme={"dark"}
  "fq": "{!bool filter=$filterClauses}",
  "vectorPreFilter": "{!bool should=$filterClauses}",
  "filterClauses": ["id:EXAMPLE_FILTER1","id:EXAMPLE_FILTER2"]
  ```

## Configuration

<Tip>
  When entering configuration values in the UI, use *unescaped* characters, such as `\t` for the tab character. When entering configuration values in the API, use *escaped* characters, such as `\\t` for the tab character.
</Tip>

<SchemaParamFields schema={schema} />
