> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Ray/Seldon Vectorize Query Stage

export const schema = {
  "type": "object",
  "title": "Ray / Seldon Vectorize Query",
  "description": "Generate a vector based on the current query string (q parameter).  Uses a machine learning model to encode the raw query string (q parameter) to a vector representation. Will be skipped if query string is blank or wildcard (* or *:*). Note this will not work well if the incoming q parameter is a Solr query parser string (e.g. field_t:foo) rather than a raw user query string.  Note that the Output Context Variable must  match the Hybrid Query stage Vector Context Variable.",
  "required": ["modelId", "queryInput", "modelInputFieldName", "modelOutputVectorFieldName", "vectorContextKey"],
  "properties": {
    "skip": {
      "type": "boolean",
      "title": "Skip This Stage",
      "description": "Set to true to skip this stage.",
      "default": false,
      "hints": ["advanced"]
    },
    "label": {
      "type": "string",
      "title": "Label",
      "description": "A unique label for this stage.",
      "hints": ["advanced"],
      "maxLength": 255
    },
    "condition": {
      "type": "string",
      "title": "Condition",
      "description": "Define a conditional script that must result in true or false. This can be used to determine if the stage should process or not.",
      "hints": ["code", "code/javascript", "advanced"]
    },
    "legacy": {
      "type": "boolean",
      "title": "Legacy",
      "description": "True if this stage only supports legacy mode",
      "hints": ["readonly", "hidden"]
    },
    "modelId": {
      "type": "string",
      "title": "Model ID",
      "description": "Model ID of the model to use for encoding. Only models which accept a single string parameter and return a single dense vector value per input are supported."
    },
    "queryInput": {
      "type": "string",
      "title": "Query Input",
      "description": "The query itself is retrieved from here.  This field supports Template Expressions such as '<request.params.q>' to evaluate the original user query.",
      "default": "<request.params.q>"
    },
    "modelInputFieldName": {
      "type": "string",
      "title": "Model Input Field",
      "description": "Name to specify for the input parameter when sending the query string to encode to the chosen ML model",
      "default": "text"
    },
    "modelOutputVectorFieldName": {
      "type": "string",
      "title": "Model Output Vector Field",
      "description": "The name of the field in the ML model response that contains the vector encoding.",
      "default": "vector"
    },
    "vectorContextKey": {
      "type": "string",
      "title": "Vector Context Key",
      "description": "The key (string) in which to put the resulting vector as a string context variable.",
      "default": "vector"
    },
    "modelConfig": {
      "type": "array",
      "title": "Model Configuration",
      "description": "Additional model parameters to pass to the model. These will be included in the model request.",
      "items": {
        "type": "object",
        "required": ["key"],
        "properties": {
          "key": {
            "type": "string",
            "title": "Parameter Name"
          },
          "value": {
            "type": "string",
            "title": "Parameter Value"
          }
        }
      }
    },
    "failOnError": {
      "type": "boolean",
      "title": "Fail on Error",
      "description": "Flag to indicate if this stage should throw an exception if an error occurs while generating an encoding.",
      "default": false
    }
  },
  "category": "AI",
  "categoryPriority": 10,
  "unsafe": false
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/lucidworks-search/09-developer-documentation/config-specs/query-pipeline-stages/ray-seldon-vectorize-query-stage

[mintlify link]: https://doc.lucidworks.com/docs/lucidworks-search/09-developer-documentation/config-specs/query-pipeline-stages/ray-seldon-vectorize-query-stage

[old doc.lw link]: https://doc.lucidworks.com/managed-fusion/5.9/4engao

The Ray/Seldon Vectorize Query stage generates a vector based on the current query string (q parameter). In Lucidworks Search 5.9.11 and earlier, this stage is called Seldon Vectorize Query.

<Note>
  This feature is only available in Lucidworks Search 5.9.x for versions 5.9.6+.
</Note>

For more information on setting up vector search with Ray or Seldon, see **Configure Ray/Seldon vector search**.

<AccordionGroup>
  <Accordion title="Configure Ray/Seldon vector search">
    {/* // tag::intro[] */}

    You can use Seldon or Ray models to vectorize text for [Neural Hybrid Search](/docs/lucidworks-search/11-vector-search/overview).

    {/* // end::intro[] */}

    <Note>This feature is only available in Lucidworks Search 5.9.x for versions 5.9.6+.</Note>

    <LwTemplate />

    ## Prerequisites

    ### Seldon

    To vectorize text with Seldon, you’ll first need to develop and deploy a machine learning model.

    ### Ray

    <Note>This feature is available starting in Lucidworks Search 5.9.12 and in all subsequent Lucidworks Search 5.9 releases.</Note>

    To vectorize text with Ray, you first need to [develop and deploy a machine learning model with Ray](#develop-and-deploy-a-machine-learning-model-with-ray).

    ## Configure index pipeline

    1. Sign into Lucidworks Search, go to **Indexing** > **Index Pipelines**, then select an existing pipeline or create a new one.
    2. Click **Add a new pipeline stage**, then select **Ray/Seldon Vectorize Field**. In Lucidworks Search 5.9.11 and earlier, this stage is called **Seldon Vectorize Field**.
    3. Fill in the required fields:
       1. Enter a **Model ID**. This is the name of the model you developed and deployed.
       2. Enter the **Model Input Field**. For example, `text`.
       3. Enter the **Model Output Vector Field**. For example, `vector`.
       4. Enter the **Source Field**. For example, `body_t`.
       5. Enter the **Destination Field**. For example, `body_512_v`.
    4. Click **Save**.
    5. Make sure the **Ray/Seldon Vectorize Field** stage is ordered before the **Solr Indexer** stage.

    Index data using the new pipeline. Confirm that your vector field is indexed by checking to see that the field is present in documents.

    ## Configure query pipeline

    1. Go to **Querying** > **Query Pipelines**, then select an existing pipeline.
    2. Click **Add a new pipeline stage**, then select **Ray/Seldon Vectorize Query**. In Lucidworks Search 5.9.11 and earlier, this stage is called **Seldon Vectorize Query**.
    3. Fill in the required fields, making sure to search against the field into which you indexed the vectors:
       1. Enter a **Model ID**. This is the name of the model you developed and deployed.
       2. Enter the **Query Input**.
       3. Enter the **Model Input Field**. For example, `text`.
       4. Enter the **Model Output Vector Field**. For example, `vector`.
       5. Enter the **Vector Context Key**. For example, `vector`.
    4. Click **Save**.
    5. Make sure the **Ray/Seldon Vectorize Query** stage is ordered before the **Solr Query** stage.

    ## Perform vector searches

    After setting up the stages, you can perform vector searches via the [`knn` query parser](https://solr.apache.org/guide/solr/latest/query-guide/dense-vector-search.html#knn-query-parser) as you would with Solr. Specify the search vector and include it in the query. For example, change the `q` parameter to a `knn` query parser string.

    The **Ray/Seldon Vectorize Query** stage will encode user queries using the specified model and modify the `q` parameter to use the `knn` query parser, turning the query into a vector search.
  </Accordion>

  <Accordion title="Develop and deploy a machine learning model with Ray">
    This tutorial walks you through deploying your own model to Fusion with Ray.

    <Note>This feature is only available in Fusion 5.9.x for versions 5.9.12 and later.</Note>

    ## Prerequisites

    * A Fusion instance with an app and indexed data.
    * An understanding of Python and the ability to write Python code.
    * [Docker](https://docs.docker.com/get-docker/) installed locally, plus a private or public Docker repository.
    * Ray installed locally: `pip install ray[serve]` using the version of ray\[serve] found in the release notes for your version of Lucidworks Search.
    * Code editor; you can use any editor, but Visual Studio Code is used in this example.
    * Model: [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2)
    * Docker image: [e5-small-v2-ray](https://hub.docker.com/r/jstrmec/e5-small-v2-ray/)

    ## Tips

    * Always test your Python code locally before uploading to Docker and then Fusion.
      This simplifies troubleshooting significantly.
    * Once you’ve created your Docker you can also test locally by doing `docker run` with a specified port, like 9000, which you can then `curl` to confirm functionality in Fusion.
      See the testing example below.
    * If you previously deployed a model with Seldon, you can deploy the same model with Ray after making a few changes to your Docker image as explained in this topic.
      To avoid conflicts, deploy the model with a different name.
      When you have verified that the Ray model is working after deployment with Ray, you can delete the Seldon model using the [Delete Seldon Core Model Deployment job](/docs/5/fusion/reference/config-ref/jobs/delete-seldon-core-model-deployment).
    * If you run into an issue with the model not deploying and you’re using the 'real' example, there is a very good chance you haven’t allocated enough memory or CPU in your job spec or in the Ray-Argo config.
      It’s easy to increase the resources. To edit the ConfigMap, run `kubectl edit configmap argo-deploy-ray-model-workflow -n <namespace>` and then find the `ray-head` container in the artisanal escaped YAML and change the memory limit.
      Exercise caution when editing because it can break the YAML.
      Just delete and replace a single character at a time without changing any formatting.
      * For additional guidance, see the [testing locally e5-model example](https://colab.research.google.com/drive/1ef-NTJBtP8p77aAa8Pp9SPXtQyozJEas?usp=sharing).

    <Card title="Intro to Machine Learning in Fusion" class="note-image" href="https://academy.lucidworks.com/intro-to-machine-learning-in-fusion" cta="Take this course on the LucidAcademy." icon="graduation-cap" iconType="duotone">
      The course for **Intro to Machine Learning in Fusion** focuses on using machine learning to infer the goals of customers and users in order to deliver a more sophisticated search experience.
    </Card>

    ## Local testing example

    1. Docker command:
       ```bash theme={"dark"}
       docker run -p 127.0.0.1:9000:9000 DOCKER_IMAGE
       ```
    2. Curl to hit Docker:
       ```bash theme={"dark"}
       curl -i -X POST http://127.0.0.1:8000 -H 'Content-Type: application/json' -d '{"text": "The quick brown fox jumps over the lazy dog."}'
       ```
    3. Curl model in Fusion:
       ```bash theme={"dark"}
       curl -u $FUSION_USER:$FUSION_PASSWORD -X POST -H 'Content-Type: application/json' -d '{"text": "i love fusion"}' https://FUSION_HOST.com:6764/api/ai/ml-models/MODEL_NAME/prediction
       ```
    4. See all your deployed models:
       ```bash theme={"dark"}
       curl -u USERNAME:PASSWORD http://FUSION_HOST:FUSION_PORT/api/ai/ml-models
       ```
    5. Check the Ray UI to see Replica State, Resources, and Logs.\
       If you are getting an internal model error, the best way to see what is going on is to query via port-forwarding the model.\
       The `MODEL_DEPLOYMENT` in the command below can be found with `kubectl get svc -n NAMESPACE`. It will have the same name as set in the model name in the **Create Ray Model Deployment** job.
       ```bash theme={"dark"}
       kubectl -n NAMESPACE port-forward svc/MODEL_DEPLOYMENT-head-svc 8000:8000
       ```
       Once port-forwarding is successful, you can use the below cURL command to see the issue.
       At that point your worker logs should show helpful error messages.
       ```bash theme={"dark"}
       curl --location 'http://127.0.0.1:8000/' \
       --header 'charset: utf-8' \
       --header 'Content-Type: application/json' \
       --data '{"text": "i love fusion"}'
       ```

    ## Download the model

    This tutorial uses the [`e5-small-v2`](https://huggingface.co/intfloat/e5-small-v2) model from Hugging Face, but any pre-trained model from [https://huggingface.co](https://huggingface.co) will work with this tutorial.

    If you want to use your own model instead, you can do so, but your model must have been trained and then saved though a function similar to the PyTorch’s `torch.save(model, PATH)` function.
    See [Saving and Loading Models](https://pytorch.org/tutorials/beginner/saving_loading_models.html) in the PyTorch documentation.

    ## Format a Python class

    The next step is to format a Python class which will be invoked by Fusion to get the results from your model.
    The skeleton below represents the format that you should follow.
    See also [Getting Started](https://docs.ray.io/en/latest/serve/getting_started.html) in the Ray Serve documentation.

    ```python wrap expandable theme={"dark"}
    from ray import serve
    from starlette.requests import Request

    # These defaults are for the ray serve deployment
    # when running simply from docker. The 'Create Ray Model Deployment'
    # job can override these replicas and resources if needed.
    @serve.deployment(num_replicas=1, ray_actor_options={"num_cpus": 1})
    class Deployment(object):
        def __init__(self):
            """
            Add any initialization parameters. Generally this is where you would load
            your model. This method will be called once when the deployment is created.
            """
            print("Initializing")
            self.model = load_model() #faux code

        # This can be named as any method which takes a dictionary as input and returns a dictionary
        # as output. In this example, we are using the encode method to encode the
        # input text into a vector.
        def encode(self, input_dict: Dict[str, Any]) -> Dict[str, Any]:
            """
            This method will be called when the deployment is queried. It will receive
            the input data and should return the output data.
            """
            text = input_dict["text"]
            embeddings = self.model.encode #faux code
            return { "vector": embeddings } # To use the 'Ray / Seldon Vectorize Field' stage, the output key should be `vector`, if using the 'Machine Learning' stage you must ensure the output key matches the output key in the 'Machine Learning' stage

        async def __call__(self, http_request: Request) -> Dict[str, Any]:
            input_dict: Dict[str, Any] = await http_request.json()
            return self.encode(input_dict=input_dict) # This will be the function you defined above, in this case encode


    app = Deployment.bind()

    ```

    A real instance of this class with the `e5-small-v2` model is as follows:

    <Note>
      This code pulls from Hugging Face. To have the model load in the image without pulling from Hugging Face or other external sources, download the model weights into a folder name and change the model name to the folder name preceded by `./`.
    </Note>

    ```python wrap expandable theme={"dark"}
    import json
    import sys
    from time import time
    from typing import Any, Dict

    import torch
    import torch.nn.functional as F
    from ray import serve
    from starlette.requests import Request
    from starlette.responses import JSONResponse
    from torch import Tensor
    from transformers import AutoModel, AutoTokenizer

    HUB_MODEL_NAME = "intfloat/e5-small-v2"


    @serve.deployment(num_replicas=1, ray_actor_options={"num_cpus": 1})
    class Deployment(object):
        def __init__(self):
            from loguru import logger

            self.logger = logger
            # Initializing logger
            self.logger.remove()
            self.logger.add(sys.stdout, level="INFO", serialize=False, colorize=True)

            # Initializing model
            self.logger.info("Loading model...")
            self.tokenizer = AutoTokenizer.from_pretrained(HUB_MODEL_NAME)
            self.model = AutoModel.from_pretrained(HUB_MODEL_NAME)
            self.model.eval()
            self.logger.info("Model initialization finished!")

        def encode(self, input_dict: Dict[str, Any]) -> Dict[str, Any]:
            _start_time = time()

            # Extracting text from input
            text = input_dict["text"]

            # Tokenization
            tokenized_texts = self.tokenizer(
                text,
                max_length=512,
                padding=True,
                truncation=True,
                return_tensors="pt",
            )

            # Encoding
            with torch.inference_mode():
                # Forward pass of the model
                outputs = self.model(**tokenized_texts)

                # Average pooling the last hidden states
                embeddings = self.average_pool(
                    outputs.last_hidden_state, tokenized_texts["attention_mask"]
                )

                # Normalizing embeddings
                embeddings = F.normalize(embeddings, p=2, dim=1)

                # Converting into output format
                output_dict = {"vector": embeddings.squeeze().tolist()}

            prediction_time = (time() - _start_time) * 1000
            self.logger.info(f"Time taken to make a prediction: {prediction_time:.0f}ms")
            return output_dict

        async def __call__(self, http_request: Request) -> Dict[str, Any]:
            try:
                input_dict: Dict[str, Any] = await http_request.json()
            except UnicodeDecodeError:
                body_bytes = await http_request.body()
                try:
                    decoded = body_bytes.decode("utf-8", errors="replace")
                    input_dict = json.loads(decoded)
                except json.JSONDecodeError:
                    return JSONResponse({"error": "Invalid JSON"}, status_code=400)
            return self.encode(input_dict=input_dict)

        @staticmethod
        def average_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
            last_hidden = last_hidden_states.masked_fill(
                ~attention_mask[..., None].bool(), 0.0
            )
            return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]


    app = Deployment.bind()
    ```

    In the preceding code, logging has been added for debugging purposes.

    The preceding code example contains the following functions:

    * `__call__`: This function is non-negotiable.
    * `init`: The `init` function is where models, tokenizers, vectorizers, and the like should be set to self for invoking.
      It is recommended that you include your model’s trained parameters directly into the Docker container rather than reaching out to external storage inside `init`.
    * `encode`: The `encode` function is where the field or query that is passed to the model from Fusion is processed.
      Alternatively, you can process it all in the `__call__` function, but it is cleaner not to.
      The `encode` function can handle any text processing needed for the model to accept input invoked in its `model.predict()` or equivalent function which gets the expected model result.

    If the output needs additional manipulation, that should be done before the result is returned.
    For embedding models, the return value must have the shape of (1, DIM), where DIM (vector dimension) is a consistent integer, to enable Fusion to handle the vector encoding into Ray.

    <Note>Use the *exact* name of the class when naming this file.</Note>

    In the preceding example, the Python file is named `deployment.py` and the class name is `Deployment()`.

    ## Create a Dockerfile

    The next step is to create a Dockerfile. The Dockerfile should follow this general outline; read the comments for additional details:

    ```dockerfile theme={"dark"}
    #It is important that python version is 3.x-slim
    FROM python:3.10-slim

    # Install dependencies
    RUN apt-get update && apt-get install -y wget

    # Create working app directory
    RUN mkdir -p /app
    WORKDIR /app

    # Copy the requirements file and install the dependencies
    COPY requirements.txt /app
    RUN pip install -r requirements.txt --no-cache-dir

    # Copy source code
    COPY deployment.py /app

    # Expose serving port for HTTP communication with Fusion
    EXPOSE 8000

    # The end of the command follows module:application and the below value should be set in the RAY DEPLOYMENT IMPORT PATH field in 'Create Ray Model Deployment' job
    CMD exec serve run deployment:app

    ```

    ## Create a requirements file

    The `requirements.txt` file is a list of installs for the `Dockerfile` to run to ensure the Docker container has the right resources to run the model.
    For the `e5-small-v2` model, the requirements are as follows:

    ```text theme={"dark"}
    torch -f https://download.pytorch.org/whl/torch_stable.html # Make sure that we download CPU version of PyTorch
    transformers
    loguru
    ray[serve]==2.42.1
    ```

    Any recent ray\[serve] version should work, but the tested value and known supported version is 2.42.1.
    In general, if an item was used in an `import` statement in your Python file, it should be included in the requirements file.

    To populate the requirements, use the following command in the terminal, inside the directory that contains your code:

    ```bash theme={"dark"}
    pip freeze > requirements.txt
    ```

    ## Build and push the Docker image

    After creating the `MODEL_NAME.py`, `Dockerfile`, and `requirements.txt` files, you need to run a few Docker commands.
    Run the following commands in order:

    ```bash theme={"dark"}
    DOCKER_DEFAULT_PLATFORM=linux/amd64 docker build . -t [DOCKERHUB-USERNAME]/[REPOSITORY]:[VERSION-TAG]
    ```

    ```bash theme={"dark"}
    docker push [DOCKERHUB USERNAME]/[REPOSITORY]:[VERSION-TAG]
    ```

    Using the example model, the terminal commands would be as follows:

    ```bash theme={"dark"}
    DOCKER_DEFAULT_PLATFORM=linux/amd64 docker build . -t jstrmec/e5-small-v2-ray:0.1
    ```

    ```bash theme={"dark"}
    docker push jstrmec/e5-small-v2-ray:0.1
    ```

    This repository is public and you can visit it here: [e5-small-v2-ray](https://hub.docker.com/r/jstrmec/e5-small-v2-ray/)

    ## Deploy the model in Fusion

    Now you can go to Fusion to deploy your model.

    When deploying your Ray model, you have two options for handling traffic:

    * Use a single deployment. Deploy one model job that handles both indexing and query traffic. This is simpler to manage and requires only one deployment.
    * Use separate deployments for indexing and querying. Deploy two separate model jobs: one dedicated to indexing and another for query traffic. This approach eliminates the risk of indexing workloads impacting query response times, providing better performance isolation and independent scaling control.

    Choose separate deployments if query performance is critical and you want to ensure indexing operations don't compete with user-facing queries for resources.

    To use separate deployments, follow the deployment steps to create two Ray model deployment jobs with different job IDs (for example, `EXAMPLE_MODEL_INDEX` and `EXAMPLE_MODEL_QUERY`). Use the index-specific model in your index pipeline stages and the query-specific model in your query pipeline stages. To keep both deployments in sync, ensure both jobs use the exact same model name, Ray deployment import path, Docker repository, and image name.

    1. In Fusion, navigate to **Collections** > **Jobs**.

    2. Add a job by clicking the **Add+** Button and selecting **Create Ray Model Deployment**.

    3. Fill in each of the text fields:

           <img src="https://mintcdn.com/lucidworks/sBy1WWIeb2aVbL1d/assets/images/5.9/5.9.12/ray/create-job.png?fit=max&auto=format&n=sBy1WWIeb2aVbL1d&q=85&s=98319923887c5d88d6fd525299bfb0a8" alt="Create a Ray model deployment job" width="1792" height="938" data-path="assets/images/5.9/5.9.12/ray/create-job.png" />

       | Parameter                  | Description                                                                                                                                                                             |
       | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
       | Job ID                     | A string used by the Fusion API to reference the job after its creation.                                                                                                                |
       | Model name                 | A name for the deployed model. This is used to generate the deployment name in Ray. It is also the name that you reference as a `model-id` when making predictions with the ML Service. |
       | Model min replicas         | The minimum number of load-balanced replicas of the model to deploy.                                                                                                                    |
       | Model max replicas         | The maximum number of load-balanced replicas of the model to deploy. Specify multiple replicas for a higher-volume intake.                                                              |
       | Model CPU limit            | The number of CPUs to allocate to a single model replica.                                                                                                                               |
       | Model memory limit         | The maximum amount of memory to allocate to a single model replica.                                                                                                                     |
       | Ray Deployment Import Path | The path to your top-level Ray Serve deployment (or the same path passed to `serve run`). For example, `deployment:app`                                                                 |
       | Docker Repository          | The public or private repository where the Docker image is located. If you’re using Docker Hub, fill in the Docker Hub username here.                                                   |
       | Image name                 | The name of the image. For example, `e5-small-v2-ray:0.1`.                                                                                                                              |
       | Kubernetes secret          | If you’re using a private repository, supply the name of the Kubernetes secret used for access.                                                                                         |

    4. Click **Advanced** to view and configure advanced details:

       |                        |                                                                                                                                                                                                                                                                               |
       | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
       | Parameter              | Description                                                                                                                                                                                                                                                                   |
       | Additional parameters. | This section lets you enter parameter `name:parameter` value options to be injected into the training JSON map at runtime. The values are inserted as they are entered, so you must surround string values with `"`. This is the sparkConfig field in the configuration file. |
       | Write Options.         | This section lets you enter parameter `name:parameter` value options to use when writing output to Solr or other sources. This is the writeOptions field in the configuration file.                                                                                           |
       | Read Options.          | This section lets you enter parameter `name:parameter` value options to use when reading input from Solr or other sources. This is the readOptions field in the configuration file.                                                                                           |

    5. Click **Save**, then **Run** and **Start**.
           <img src="https://mintcdn.com/lucidworks/sBy1WWIeb2aVbL1d/assets/images/5.9/5.9.12/ray/start-job.png?fit=max&auto=format&n=sBy1WWIeb2aVbL1d&q=85&s=58ab2120fb33d38f2c97d87fedcf39f5" alt="Start a Ray model deployment job" width="1180" height="514" data-path="assets/images/5.9/5.9.12/ray/start-job.png" />
       When the job finishes successfully, you can proceed to the next section.

    Now that the model is in Fusion, you can use it in the Machine Learning or Ray / Seldon Vectorize index and query stages.

    ## Configure the Fusion pipelines

    Your real-world pipeline configuration depends on your use case and model, but for our example we will configure the index pipeline and then the query pipeline.

    **Configure the index pipeline**

    1. Create a new index pipeline or load an existing one for editing.
    2. Click **Add a Stage** and then **Machine Learning**.
    3. In the new stage, fill in these fields:
       * The model ID
       * The model input
       * The model output
    4. Save the stage in the pipeline and index your data with it.

    **Configure the query pipeline**

    1. Create a new query pipeline or load an existing one for editing.
    2. Click **Add a Stage** and then **Machine Learning**
    3. In the new stage, fill in these fields:
       * The model ID
       * The model input
       * The model output
    4. Save the stage and then run a query by typing a search term.
    5. To verify the Ray results are correct, use the **Compare+** button to see another pipeline without the model implementation and compare the number of results.

    You have now successfully uploaded a Ray model to Fusion and deployed it.
  </Accordion>
</AccordionGroup>

<Note>
  **Important**

  This query stage must be placed *before* the **[Solr Query stage](/docs/lucidworks-search/09-developer-documentation/config-specs/query-pipeline-stages/solr-query)**.
</Note>

## Configuration

<Tip>
  When entering configuration values in the UI, use *unescaped* characters, such as `\t` for the tab character. When entering configuration values in the API, use *escaped* characters, such as `\\t` for the tab character.
</Tip>

<SchemaParamFields schema={schema} />