> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Include Documents Index Stage

export const schema = {
  "type": "object",
  "title": "Include Documents",
  "description": "This stage passes a document through if any of the specified rules match; drops the document otherwise",
  "required": ["matchRules"],
  "properties": {
    "skip": {
      "type": "boolean",
      "title": "Skip This Stage",
      "description": "Set to true to skip this stage.",
      "default": false,
      "hints": ["advanced"]
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A unique label for this stage.",
      "hints": ["advanced"],
      "maxLength": 255
    },
    "condition": {
      "type": "string",
      "title": "Condition",
      "description": "Define a conditional script that must result in true or false. This can be used to determine if the stage should process or not.",
      "hints": ["code", "code/javascript", "advanced"]
    },
    "matchRules": {
      "type": "array",
      "title": "Fields and Patterns",
      "minItems": 1,
      "items": {
        "type": "object",
        "required": ["field", "pattern"],
        "properties": {
          "field": {
            "type": "string",
            "title": "Field",
            "description": "The name of the field to match"
          },
          "pattern": {
            "type": "string",
            "title": "Regex Pattern",
            "description": "Pattern to match the field value against. The value may be a regex pattern.",
            "format": "regex"
          }
        }
      }
    }
  },
  "category": "Document Filtering and Enrichment",
  "categoryPriority": 8,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/5/fusion/reference/config-ref/pipeline-stages/index-stages/include-documents-index-stage

[mintlify link]: https://doc.lucidworks.com/docs/5/fusion/reference/config-ref/pipeline-stages/index-stages/include-documents-index-stage

[old doc.lw link]: https://doc.lucidworks.com/fusion/5.9/244

This stage passes documents to the next stage in the pipeline if they match one or more of the specified rules (Boolean OR). If some field has multiple values then at least one value must match against specified pattern. All non-matching documents are dropped. Rules are defined using regular expression field matching.

<LwTemplate />

## Examples

**Give the "simple-include" pipeline a stage that includes only certain document types**

```bash wrap  theme={"dark"}
curl -u USERNAME:PASSWORD -X POST -H "Content-type: application/json" 'http://FUSION_HOST:FUSION_PORT/api/index-pipelines' -d '
{
  "id" : "simple-include",
  "stages" : [ {
    "type" : "include-doc",
    "matchRules" : [ {
        "field" : "document_type",
        "pattern" : "(xls|xlsx|xlst|doc|docx)"
    }]
  }]
}'
```

Response:

```json wrap  theme={"dark"}
{
  "id" : "simple-include",
  "stages" : [ {
    "type" : "include-doc",
    "id" : "f701f96b-780e-4355-9dd3-6e53a89afe3e",
    "matchRules" : [ {
      "field" : "document_type",
      "pattern" : "(xls|xlsx|xlst|doc|docx)"
    } ],
    "type" : "include-doc",
    "skip" : false,
    "label" : "include-doc"
  } ],
  "properties" : { }
}
```

**Send a text document through the "simple-include" pipeline**

```bash wrap  theme={"dark"}
curl -u USERNAME:PASSWORD 'http://FUSION_HOST:FUSION_PORT/api/index-pipelines/simple-include/collections/logs/index?simulate=true&echo=true' -H 'Content-type: application/json' -d '
{
  "document_type": "txt"
}'
```

The empty response indicates the document was dropped:

```json wrap  theme={"dark"}
[ ]
```

**Send an XLS document through the pipeline**

```bash wrap  theme={"dark"}
curl -u USERNAME:PASSWORD 'http://FUSION_HOST:FUSION_PORT/api/index-pipelines/simple-include/collections/logs/index?simulate=true&echo=true' -H 'Content-type: application/json' -d '
{
  "document_type": "xls"
}'
```

The response is document metadata, indicating the document passed the stage:

```json wrap  theme={"dark"}
 {
  "id" : "9e7d1c2e-343a-49de-bc6a-1d1fc25fa93f",
  "fields" : [ {
    "name" : "document_type",
    "value" : "xls",
    "metadata" : { },
    "annotations" : [ ]
  } ]
} ]
```

## Configuration

<Tip>
  When entering configuration values in the UI, use *unescaped* characters, such as `\t` for the tab character. When entering configuration values in the API, use *escaped* characters, such as `\\t` for the tab character.
</Tip>

<SchemaParamFields schema={schema} />
