PySparkBatch
import type { PySparkBatch } from "https://googleapis.deno.dev/v1/dataproc:v1.ts";
A configuration for running an Apache PySpark (https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html) batch workload.
interface PySparkBatch {
archiveUris?: string[];
args?: string[];
fileUris?: string[];
jarFileUris?: string[];
mainPythonFileUri?: string;
pythonFileUris?: string[];
}§Properties
§
archiveUris?: string[]
[src]Optional. HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
§
args?: string[]
[src]Optional. The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as --conf, since a collision can occur that causes an incorrect batch submission.
§
fileUris?: string[]
[src]Optional. HCFS URIs of files to be placed in the working directory of each executor.
§
jarFileUris?: string[]
[src]Optional. HCFS URIs of jar files to add to the classpath of the Spark driver and tasks.