PySparkBatch

import type { PySparkBatch } from "https://googleapis.deno.dev/v1/dataproc:v1.ts";

A configuration for running an Apache PySpark (https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html) batch workload.

interface PySparkBatch {

archiveUris?: string[];

args?: string[];

fileUris?: string[];

jarFileUris?: string[];

mainPythonFileUri?: string;

pythonFileUris?: string[];

}

§Properties

archiveUris?: string[]

[src]

Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.

args?: string[]

[src]

Optional. The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as --conf, since a collision can occur that causes an incorrect batch submission.

fileUris?: string[]

[src]

Optional. HCFS URIs of files to be placed in the working directory of each executor.

jarFileUris?: string[]

[src]

Optional. HCFS URIs of jar files to add to the classpath of the Spark driver and tasks.

mainPythonFileUri?: string

[src]

Required. The HCFS URI of the main Python file to use as the Spark driver. Must be a .py file.

pythonFileUris?: string[]

[src]

Optional. HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip.