public void CreateSparkBatchJob() { // Environment variable with the storage account associated with the Synapse workspace endpoint. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of the storage account. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:CreateBatchJob string name = $"batchSample"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount); SparkBatchJobOptions options = new SparkBatchJobOptions(name: name, file: file) { ClassName = "WordCount", Arguments = new List <string> { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = batchClient.CreateSparkBatchJob(options); #endregion }
public async Task TestSparkBatchJob() { SparkBatchClient client = CreateClient(); // Submit the Spark job SparkBatchJobOptions createParams = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment); SparkBatchOperation createOperation = await client.StartCreateSparkBatchJobAsync(createParams); SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync(); // Verify the Spark batch job completes successfully Assert.True("success".Equals(jobCreateResponse.State, StringComparison.OrdinalIgnoreCase) && jobCreateResponse.Result == SparkBatchJobResultType.Succeeded, string.Format( "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}", jobCreateResponse.Id, jobCreateResponse.State, jobCreateResponse.Result, string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>()) ) ); // Get the list of Spark batch jobs and check that the submitted job exists List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client); Assert.NotNull(listJobResponse); Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id)); }
public async Task TestSparkBatchJobCompletesWhenJobStarts() { SparkBatchClient client = CreateClient(); // Submit the Spark job SparkBatchJobOptions createParams = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment); SparkBatchOperation createOperation = await client.StartCreateSparkBatchJobAsync(createParams); SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync(); // Verify the Spark batch job submission starts successfully Assert.True(LivyStates.Starting == jobCreateResponse.State || LivyStates.Running == jobCreateResponse.State || LivyStates.Success == jobCreateResponse.State, string.Format( "Job: {0} did not return success. Current job state: {1}. Error (if any): {2}", jobCreateResponse.Id, jobCreateResponse.State, string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>()) ) ); // Get the list of Spark batch jobs and check that the submitted job exists List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client); Assert.NotNull(listJobResponse); Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id)); }
public async Task TestSparkBatchJob() { // Submit the Spark job SparkBatchJobOptions createParams = this.CreateSparkJobRequestParameters(); SparkBatchJob jobCreateResponse = (await SparkBatchClient.CreateSparkBatchJobAsync(createParams)).Value; // Poll the Spark job until it finishes SparkBatchJob getJobResponse = await this.PollSparkBatchJobSubmissionAsync(jobCreateResponse); // Verify the Spark batch job completes successfully Assert.True("success".Equals(getJobResponse.State, StringComparison.OrdinalIgnoreCase) && getJobResponse.Result == SparkBatchJobResultType.Succeeded, string.Format( "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}", getJobResponse.Id, getJobResponse.State, getJobResponse.Result, string.Join(", ", getJobResponse.Errors ?? new List <SparkServiceError>()) ) ); // Get the list of Spark batch jobs and check that the submitted job exists List <SparkBatchJob> listJobResponse = await this.ListSparkBatchJobsAsync(); Assert.NotNull(listJobResponse); Assert.IsTrue(listJobResponse.Any(job => job.Id == getJobResponse.Id)); }
public void SubmitSparkJobSync() { // Environment variable with the Synapse workspace endpoint. string endpoint = TestEnvironment.EndpointUrl; // Environment variable with the Synapse Spark pool name. string sparkPoolName = TestEnvironment.SparkPoolName; // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:CreateSparkBatchClient SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential()); #endregion #region Snippet:SubmitSparkBatchJob string name = $"batch-{Guid.NewGuid()}"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount); SparkBatchJobOptions request = new SparkBatchJobOptions(name, file) { ClassName = "WordCount", Arguments = { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = client.CreateSparkBatchJob(request); #endregion #region Snippet:ListSparkBatchJobs Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs(); foreach (SparkBatchJob job in jobs.Value.Sessions) { Console.WriteLine(job.Name); } #endregion #region Snippet:GetSparkBatchJob SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id); Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}"); #endregion #region Snippet:DeleteSparkBatchJob Response operation = client.CancelSparkBatchJob(jobCreated.Id); #endregion }
public SparkBatchJob SubmitSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool waitForCompletion) { var batch = _sparkBatchClient.CreateSparkBatchJob(sparkBatchJobOptions, detailed: true); if (!waitForCompletion) { return(batch); } return(PollSparkBatchJobSubmission(batch)); }
public SparkBatchJob SubmitSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool waitForCompletion) { var batch = _sparkBatchClient.StartCreateSparkBatchJob(sparkBatchJobOptions, detailed: true); if (!waitForCompletion) { return(GetSparkBatchJob(int.Parse(batch.Id))); } return(batch.Poll().Value); }
public void SparkSample() { #region Snippet:CreateBatchClient // Replace the string below with your actual endpoint url. string endpoint = "<my-endpoint-url>"; /*@@*/ endpoint = TestEnvironment.EndpointUrl; string sparkPoolName = TestEnvironment.SparkPoolName; SparkBatchClient client = new SparkBatchClient(endpoint: new Uri(endpoint), sparkPoolName: sparkPoolName, credential: new DefaultAzureCredential()); #endregion // Environment variable with the storage account associated with the Synapse workspace endpoint. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of the storage account. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:CreateBatchJob string name = $"batchSample"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount); SparkBatchJobOptions options = new SparkBatchJobOptions(name: name, file: file) { ClassName = "WordCount", Arguments = { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = client.CreateSparkBatchJob(options); #endregion #region Snippet:ListSparkBatchJobs Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs(); foreach (SparkBatchJob job in jobs.Value.Sessions) { Console.WriteLine(job.Name); } #endregion #region Snippet:DeleteSparkBatchJob /*@@*/ int jobId = jobs.Value.Sessions.First().Id; // Replace the integer below with your actual job ID. //@@ string jobId = 0; Response operation = client.CancelSparkBatchJob(jobId); #endregion }
public virtual Response <SparkBatchJob> CreateSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default) { using var scope = _clientDiagnostics.CreateScope("SparkBatchClient.CreateSparkBatchJob"); scope.Start(); try { return(RestClient.CreateSparkBatchJob(sparkBatchJobOptions, detailed, cancellationToken)); } catch (Exception e) { scope.Failed(e); throw; } }
public void SubmitSparkJobSync() { // Environment variable with the Synapse workspace endpoint. string workspaceUrl = TestEnvironment.WorkspaceUrl; // Environment variable with the Synapse Spark pool name. string sparkPoolName = TestEnvironment.SparkPoolName; // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:SparkBatchSample1SparkBatchClient SparkBatchClient client = new SparkBatchClient(new Uri(workspaceUrl), sparkPoolName, new DefaultAzureCredential()); #endregion #region Snippet:SparkBatchSample1SubmitSparkJob string name = $"batch-{Guid.NewGuid()}"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount); SparkBatchJobOptions request = new SparkBatchJobOptions(name, file) { ClassName = "WordCount", Arguments = new List <string> { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = client.CreateSparkBatchJob(request); #endregion #region Snippet:SparkBatchSample1GetSparkJob SparkBatchJob job = client.GetSparkBatchJob(jobCreated.Id); Debug.WriteLine($"Job is returned with name {job.Name} and state {job.State}"); #endregion #region Snippet:SparkBatchSample1CancelSparkJob Response operation = client.CancelSparkBatchJob(jobCreated.Id); #endregion }
public void SubmitSparkJobSync() { #region Snippet:CreateSparkBatchClient #if SNIPPET // Replace the strings below with the spark, endpoint, and file system information string sparkPoolName = "<my-spark-pool-name>"; string endpoint = "<my-endpoint-url>"; string storageAccount = "<my-storage-account-name>"; string fileSystem = "<my-storage-filesystem-name>"; #else string sparkPoolName = TestEnvironment.SparkPoolName; string endpoint = TestEnvironment.EndpointUrl; string storageAccount = TestEnvironment.StorageAccountName; string fileSystem = TestEnvironment.StorageFileSystemName; #endif SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential()); #endregion #region Snippet:SubmitSparkBatchJob string name = $"batch-{Guid.NewGuid()}"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount); SparkBatchJobOptions request = new SparkBatchJobOptions(name, file) { ClassName = "WordCount", Arguments = { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchOperation createOperation = client.StartCreateSparkBatchJob(request); while (!createOperation.HasCompleted) { System.Threading.Thread.Sleep(2000); createOperation.UpdateStatus(); } SparkBatchJob jobCreated = createOperation.Value; #endregion #region Snippet:ListSparkBatchJobs Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs(); foreach (SparkBatchJob job in jobs.Value.Sessions) { Console.WriteLine(job.Name); } #endregion #region Snippet:GetSparkBatchJob SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id); Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}"); #endregion #region Snippet:CancelSparkBatchJob Response operation = client.CancelSparkBatchJob(jobCreated.Id); #endregion }
public override void ExecuteCmdlet() { this.Language = LanguageType.Parse(this.Language); if (string.IsNullOrEmpty(this.MainClassName)) { if (LanguageType.SparkDotNet == this.Language || LanguageType.Spark == this.Language) { throw new SynapseException(Resources.MissingMainClassName); } } if (this.IsParameterBound(c => c.SparkPoolObject)) { var resourceIdentifier = new ResourceIdentifier(this.SparkPoolObject.Id); this.WorkspaceName = resourceIdentifier.ParentResource; this.WorkspaceName = this.WorkspaceName.Substring(this.WorkspaceName.LastIndexOf('/') + 1); this.SparkPoolName = resourceIdentifier.ResourceName; } this.MainDefinitionFile = Utils.NormalizeUrl(this.MainDefinitionFile); if (this.CommandLineArgument != null) { for (int i = 0; i < this.CommandLineArgument.Length; i++) { this.CommandLineArgument[i] = Utils.NormalizeUrl(this.CommandLineArgument[i]); } } if (this.ReferenceFile != null) { for (int i = 0; i < this.ReferenceFile.Length; i++) { this.ReferenceFile[i] = Utils.NormalizeUrl(this.ReferenceFile[i]); } } Utils.CategorizedFiles(this.ReferenceFile, out IList <string> jars, out IList <string> files); bool isSparkDotNet = this.Language == LanguageType.SparkDotNet; var batchRequest = new SparkBatchJobOptions(this.Name, isSparkDotNet ? SynapseConstants.SparkDotNetJarFile : this.MainDefinitionFile) { ClassName = isSparkDotNet ? SynapseConstants.SparkDotNetClassName : (this.Language == LanguageType.PySpark ? null : this.MainClassName), Arguments = isSparkDotNet ? new List <string> { this.MainDefinitionFile, this.MainClassName } .Concat(this.CommandLineArgument ?? new string[0]).ToArray() : this.CommandLineArgument, Jars = jars, Files = files, Archives = isSparkDotNet ? new List <string> { $"{this.MainDefinitionFile}#{SynapseConstants.SparkDotNetUdfsFolderName}" } : null, Configuration = this.Configuration?.ToDictionary(), ExecutorMemory = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Memory + "g", ExecutorCores = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Cores, DriverMemory = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Memory + "g", DriverCores = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Cores, ExecutorCount = this.ExecutorCount }; // Ensure the relative path of UDFs is add to "--conf". if (isSparkDotNet) { batchRequest.Configuration = batchRequest.Configuration ?? new Dictionary <string, string>(); string udfsRelativePath = "./" + SynapseConstants.SparkDotNetUdfsFolderName; batchRequest.Configuration.TryGetValue(SynapseConstants.SparkDotNetAssemblySearchPathsKey, out string pathValue); var paths = pathValue?.Split(',').Select(path => path.Trim()).Where(path => !string.IsNullOrEmpty(path)).ToList() ?? new List <string>(); if (!paths.Contains(udfsRelativePath)) { paths.Add(udfsRelativePath); } batchRequest.Configuration[SynapseConstants.SparkDotNetAssemblySearchPathsKey] = string.Join(",", paths); } if (this.ShouldProcess(this.SparkPoolName, string.Format(Resources.SubmittingSynapseSparkJob, this.SparkPoolName, this.WorkspaceName))) { var jobInformation = SynapseAnalyticsClient.SubmitSparkBatchJob(batchRequest, waitForCompletion: false); WriteObject(new PSSynapseSparkJob(jobInformation)); } }
public virtual async Task <Response <SparkBatchJob> > CreateSparkBatchJobAsync(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default) { using var scope = _clientDiagnostics.CreateScope("SparkBatchClient.CreateSparkBatchJob"); scope.Start(); try { return(await RestClient.CreateSparkBatchJobAsync(sparkBatchJobOptions, detailed, cancellationToken).ConfigureAwait(false)); } catch (Exception e) { scope.Failed(e); throw; } }
private async Task <SparkBatchOperation> StartCreateSparkBatchJobInternal(bool async, SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default) { using DiagnosticScope scope = _clientDiagnostics.CreateScope($"{nameof(SparkBatchClient)}.{nameof(StartCreateSparkBatchJob)}"); scope.Start(); try { Response <SparkBatchJob> batchSession; if (async) { batchSession = await RestClient.CreateSparkBatchJobAsync(sparkBatchJobOptions, detailed, cancellationToken).ConfigureAwait(false); } else { batchSession = RestClient.CreateSparkBatchJob(sparkBatchJobOptions, detailed, cancellationToken); } return(new SparkBatchOperation(this, _clientDiagnostics, batchSession)); } catch (Exception e) { scope.Failed(e); throw; } }
public virtual SparkBatchOperation StartCreateSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default) => StartCreateSparkBatchJobInternal(false, sparkBatchJobOptions, detailed, cancellationToken).EnsureCompleted();
public virtual async Task <SparkBatchOperation> StartCreateSparkBatchJobAsync(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default) => await StartCreateSparkBatchJobInternal(true, sparkBatchJobOptions, detailed, cancellationToken).ConfigureAwait(false);