> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Transfer Collection to Cloud

> Job configuration specifications

export const schema = {
  "type": "object",
  "title": "Transfer Collection To Cloud",
  "description": "Transfer Collection to Cloud Storage, for collections that need to be migrated or copied to cloud storage",
  "required": ["id", "inputCollection", "outputLocation", "type"],
  "properties": {
    "id": {
      "type": "string",
      "title": "Spark Job ID",
      "description": "The ID for this Spark job. Used in the API to reference this job. Allowed characters: a-z, A-Z, dash (-) and underscore (_). Maximum length: 63 characters.",
      "maxLength": 63,
      "pattern": "[a-zA-Z][_\\-a-zA-Z0-9]*[a-zA-Z0-9]?"
    },
    "sparkConfig": {
      "type": "array",
      "title": "Spark Settings",
      "description": "Spark configuration settings.",
      "hints": ["advanced"],
      "items": {
        "type": "object",
        "required": ["key"],
        "properties": {
          "key": {
            "type": "string",
            "title": "Parameter Name"
          },
          "value": {
            "type": "string",
            "title": "Parameter Value"
          }
        }
      }
    },
    "inputCollection": {
      "type": "string",
      "title": "Collection",
      "description": "Solr collection to copy",
      "minLength": 1
    },
    "outputLocation": {
      "type": "string",
      "title": "Output Location",
      "description": "URI of output location (e.g. s3a://..., gs://..., wasb://...)",
      "minLength": 1
    },
    "overwriteOutput": {
      "type": "boolean",
      "title": "Overwrite Output",
      "description": "Overwrite output collection",
      "default": true
    },
    "outputFormat": {
      "type": "string",
      "title": "Output format",
      "description": "Format for cloud output (e.g. parquet, json, csv)",
      "default": "parquet"
    },
    "sparkPartitions": {
      "type": "integer",
      "title": "Set minimum Spark partitions for input",
      "description": "Spark will re-partition the input to have this number of partitions. Increase for greater parallelism",
      "default": 200,
      "hints": ["advanced"]
    },
    "readOptions": {
      "type": "array",
      "title": "Read Options",
      "description": "Options used when reading input from Solr",
      "hints": ["advanced"],
      "items": {
        "type": "object",
        "required": ["key"],
        "properties": {
          "key": {
            "type": "string",
            "title": "Parameter Name"
          },
          "value": {
            "type": "string",
            "title": "Parameter Value"
          }
        }
      }
    },
    "type": {
      "type": "string",
      "title": "Spark Job Type",
      "enum": ["transfer"],
      "default": "transfer",
      "hints": ["readonly"]
    }
  },
  "additionalProperties": true,
  "category": "Other",
  "categoryPriority": 1
};

export const SchemaParamFields = ({schema}) => {
  const sanitize = str => {
    if (typeof str !== "string") return str;
    return str.replace(/^"(.*)"$/s, "$1").replace(/\\/g, "").replace(/"/g, "'");
  };
  const formatDescription = str => {
    const s = sanitize(str);
    return (/[.!?]\)*$/).test(s) ? s : `${s}.`;
  };
  const {description, properties = {}, required: requiredProps = []} = schema;
  const visibleProps = useMemo(() => Object.entries(properties).filter(([, prop]) => !prop.hints?.includes("hidden")), [properties]);
  return <div>
      {description && <p>{formatDescription(description)}</p>}

      {visibleProps.map(([name, prop]) => {
    const isRequired = requiredProps.includes(name);
    const hasDefault = prop.default !== undefined;
    const rawDefault = prop.default;
    const isComplexDefault = hasDefault && (typeof rawDefault === "object" || typeof rawDefault === "string" && (rawDefault.length > 20 || rawDefault.includes('"')));
    const fieldProps = {
      key: name,
      body: prop.title || name,
      type: prop.type,
      ...prop.title && ({
        post: [<><span className="text-stone-400 dark:text-stone-500">API property: </span>{name}</>]
      }),
      ...isRequired && ({
        required: true
      }),
      ...!isComplexDefault && hasDefault ? {
        default: sanitize(String(rawDefault))
      } : {}
    };
    const isObject = prop.type === "object" && prop.properties;
    const isArrayOfObjects = prop.type === "array" && prop.items?.type === "object" && prop.items.properties;
    return <ParamField {...fieldProps}>
            {prop.description && <p>{formatDescription(prop.description)}</p>}

            {isComplexDefault && <div className="flex">
                <p>
                  <strong>Default:</strong>
                </p>
                <pre className="!my-0">
                  <code>
                    {JSON.stringify(rawDefault, null, 2)}
                  </code>
                </pre>
              </div>}

            {isArrayOfObjects && <div className="flex">
              <p>
                <strong>Object attributes:</strong>
              </p>
              <pre className="!my-0">
                <code>
                  {'{\n'}
                  {Object.entries(prop.items.properties).map(([iname, iprop]) => <>
                      {`  ${iname}`}
                      {prop.items?.required?.includes(iname) && <span style={{
      color: 'red'
    }}> required</span>}
                      {`: {\n    display name: ${sanitize(iprop.title || '')}\n    type: ${iprop.type}\n  }\n`}
                    </>)}
                  {'}'}
                </code>
              </pre>
              </div>}

            {isObject && <Expandable title="properties">
                <SchemaParamFields schema={{
      properties: prop.properties,
      required: prop.required
    }} />
              </Expandable>}
          </ParamField>;
  })}
    </div>;
};

export const LwTemplate = ({title = "Key questions to get you started", icon = "sparkles", cta = "Powered by Agent Studio", linkHref = "https://lucidworks.com/demo/?utm_source=docs&utm_medium=referral&utm_campaign=docs_cta_ai"}) => {
  const [isLoaded, setIsLoaded] = useState(false);
  useEffect(() => {
    const timer = setTimeout(() => {
      setIsLoaded(true);
    }, 500);
    return () => clearTimeout(timer);
  }, []);
  return <div className="lw-template-container">
      <Card title={title} icon={icon}>
        {isLoaded && <span dangerouslySetInnerHTML={{
    __html: `<lw-template id="a029c1a9-28be-427e-b0e1-5d918920246a"></lw-template
            >`
  }} />}
        <Link href={linkHref} className="agent-studio-link text-left text-gray-600 gap-2 dark:text-gray-400 text-sm font-medium flex flex-row items-center hover:text-primary dark:hover:text-primary-light group-hover:text-primary group-hover:dark:text-primary-light">Powered by Lucidworks Agent Studio</Link>
      </Card>
    </div>;
};

[localhost link]: http://localhost:3000/docs/lucidworks-search/09-developer-documentation/config-specs/jobs/transfer-collection-to-cloud

[mintlify link]: https://doc.lucidworks.com/docs/lucidworks-search/09-developer-documentation/config-specs/jobs/transfer-collection-to-cloud

[old doc.lw link]: https://doc.lucidworks.com/managed-fusion/5.9/m6c66h

The Transfer Collection to Cloud job lets you to migrate or copy your Solr collection to cloud storage.

To create a Transfer Collection to Cloud job, sign in to Lucidworks Search and click **Collections > Jobs**. Then click **Add+** and in the Custom and Others Jobs section, select **Transfer Collection To Cloud**. You can enter basic and advanced parameters to configure the job. If the field has a default value, it is populated when you click to add the job.

<LwTemplate />

## Basic parameters

<Note>
  To enter advanced parameters in the UI, click **Advanced**. Those parameters are described in [the advanced parameters section](#advanced-parameters).
</Note>

* **Spark job ID.** The unique ID for the Spark job that references this job in the API. This is the `id` field in the configuration file. Required field.
* **Collection.** The Solr collection to transfer or copy to cloud storage. This is the `inputCollection` field in the configuration file. Required field.
* **Output location.** The name or location (URI) where the Solr collection is being transferred or copied. This is the `outputLocation` field in the configuration file. Required field.
* **Overwrite output.** If this checkbox is selected (set to `true`), overwrite any information that currently exists in the **Output location** with the data in the **Collection** being transferred or copied. If this checkbox is not selected and data exists in the output collection, the collection is not copied to the output location and the system generates an error. If this checkbox is not selected and data does not exist in the output collection, the collection is copied to the output location. This is the `overwriteOutput` field in the configuration file. Optional field.
* **Output format.** The format for the output transferred or copied to the cloud. Values include `parquet`, `json`, and `csv`. This is the `outputFormat` field in the configuration file. Optional field.

## Advanced parameters

If you click the **Advanced** toggle, the following optional fields are displayed in the UI.

* **Spark Settings.** This section lets you enter `parameter name:parameter value` options to use for Spark configuration. This is the `sparkConfig` field in the configuration file.
* **Set minimum Spark partitions for input.** The number of partitions that Spark sets for the input. For greater parallelism, increase the value in this field. This is the `sparkPartitions` field in the configuration file.
* **Read Options.** This section lets you enter `parameter name:parameter value` options to use when reading input from Solr. This is the `readOptions` field in the configuration file.

<SchemaParamFields schema={schema} />
