public async Task TestSparkBatchJobCompletesWhenJobStarts() { SparkBatchClient client = CreateClient(); // Submit the Spark job SparkBatchJobOptions createParams = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment); SparkBatchOperation createOperation = await client.StartCreateSparkBatchJobAsync(createParams); SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync(); // Verify the Spark batch job submission starts successfully Assert.True(LivyStates.Starting == jobCreateResponse.State || LivyStates.Running == jobCreateResponse.State || LivyStates.Success == jobCreateResponse.State, string.Format( "Job: {0} did not return success. Current job state: {1}. Error (if any): {2}", jobCreateResponse.Id, jobCreateResponse.State, string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>()) ) ); // Get the list of Spark batch jobs and check that the submitted job exists List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client); Assert.NotNull(listJobResponse); Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id)); }
public async Task TestSparkBatchJob() { SparkBatchClient client = CreateClient(); // Submit the Spark job SparkBatchJobOptions createParams = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment); SparkBatchOperation createOperation = await client.StartCreateSparkBatchJobAsync(createParams); SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync(); // Verify the Spark batch job completes successfully Assert.True("success".Equals(jobCreateResponse.State, StringComparison.OrdinalIgnoreCase) && jobCreateResponse.Result == SparkBatchJobResultType.Succeeded, string.Format( "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}", jobCreateResponse.Id, jobCreateResponse.State, jobCreateResponse.Result, string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>()) ) ); // Get the list of Spark batch jobs and check that the submitted job exists List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client); Assert.NotNull(listJobResponse); Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id)); }
public async Task TestSparkBatchJob() { // Submit the Spark job SparkBatchJobOptions createParams = this.CreateSparkJobRequestParameters(); SparkBatchJob jobCreateResponse = (await SparkBatchClient.CreateSparkBatchJobAsync(createParams)).Value; // Poll the Spark job until it finishes SparkBatchJob getJobResponse = await this.PollSparkBatchJobSubmissionAsync(jobCreateResponse); // Verify the Spark batch job completes successfully Assert.True("success".Equals(getJobResponse.State, StringComparison.OrdinalIgnoreCase) && getJobResponse.Result == SparkBatchJobResultType.Succeeded, string.Format( "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}", getJobResponse.Id, getJobResponse.State, getJobResponse.Result, string.Join(", ", getJobResponse.Errors ?? new List <SparkServiceError>()) ) ); // Get the list of Spark batch jobs and check that the submitted job exists List <SparkBatchJob> listJobResponse = await this.ListSparkBatchJobsAsync(); Assert.NotNull(listJobResponse); Assert.IsTrue(listJobResponse.Any(job => job.Id == getJobResponse.Id)); }
public void SubmitSparkJobSync() { // Environment variable with the Synapse workspace endpoint. string endpoint = TestEnvironment.EndpointUrl; // Environment variable with the Synapse Spark pool name. string sparkPoolName = TestEnvironment.SparkPoolName; // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:CreateSparkBatchClient SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential()); #endregion #region Snippet:SubmitSparkBatchJob string name = $"batch-{Guid.NewGuid()}"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount); SparkBatchJobOptions request = new SparkBatchJobOptions(name, file) { ClassName = "WordCount", Arguments = { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = client.CreateSparkBatchJob(request); #endregion #region Snippet:ListSparkBatchJobs Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs(); foreach (SparkBatchJob job in jobs.Value.Sessions) { Console.WriteLine(job.Name); } #endregion #region Snippet:GetSparkBatchJob SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id); Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}"); #endregion #region Snippet:DeleteSparkBatchJob Response operation = client.CancelSparkBatchJob(jobCreated.Id); #endregion }
public async Task TestGetSparkBatchJob() { SparkBatchJobCollection sparkJobs = (await SparkBatchClient.GetSparkBatchJobsAsync()).Value; foreach (SparkBatchJob expectedSparkJob in sparkJobs.Sessions) { SparkBatchJob actualSparkJob = await SparkBatchClient.GetSparkBatchJobAsync(expectedSparkJob.Id); ValidateSparkBatchJob(expectedSparkJob, actualSparkJob); } }
public void SparkSample() { #region Snippet:CreateBatchClient // Replace the string below with your actual endpoint url. string endpoint = "<my-endpoint-url>"; /*@@*/ endpoint = TestEnvironment.EndpointUrl; string sparkPoolName = TestEnvironment.SparkPoolName; SparkBatchClient client = new SparkBatchClient(endpoint: new Uri(endpoint), sparkPoolName: sparkPoolName, credential: new DefaultAzureCredential()); #endregion // Environment variable with the storage account associated with the Synapse workspace endpoint. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of the storage account. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:CreateBatchJob string name = $"batchSample"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount); SparkBatchJobOptions options = new SparkBatchJobOptions(name: name, file: file) { ClassName = "WordCount", Arguments = { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = client.CreateSparkBatchJob(options); #endregion #region Snippet:ListSparkBatchJobs Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs(); foreach (SparkBatchJob job in jobs.Value.Sessions) { Console.WriteLine(job.Name); } #endregion #region Snippet:DeleteSparkBatchJob /*@@*/ int jobId = jobs.Value.Sessions.First().Id; // Replace the integer below with your actual job ID. //@@ string jobId = 0; Response operation = client.CancelSparkBatchJob(jobId); #endregion }
public SynapseAnalyticsSparkClient(string workspaceName, string sparkPoolName, IAzureContext context) { if (context == null) { throw new AzPSInvalidOperationException(Resources.InvalidDefaultSubscription); } string suffix = context.Environment.GetEndpoint(AzureEnvironment.ExtendedEndpoint.AzureSynapseAnalyticsEndpointSuffix); Uri uri = new Uri("https://" + workspaceName + "." + suffix); _sparkBatchClient = new SparkBatchClient(uri, sparkPoolName, new AzureSessionCredential(context)); _sparkSessionClient = new SparkSessionClient(uri, sparkPoolName, new AzureSessionCredential(context)); }
public void SubmitSparkJobSync() { // Environment variable with the Synapse workspace endpoint. string workspaceUrl = TestEnvironment.WorkspaceUrl; // Environment variable with the Synapse Spark pool name. string sparkPoolName = TestEnvironment.SparkPoolName; // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace. string storageAccount = TestEnvironment.StorageAccountName; // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace. string fileSystem = TestEnvironment.StorageFileSystemName; #region Snippet:SparkBatchSample1SparkBatchClient SparkBatchClient client = new SparkBatchClient(new Uri(workspaceUrl), sparkPoolName, new DefaultAzureCredential()); #endregion #region Snippet:SparkBatchSample1SubmitSparkJob string name = $"batch-{Guid.NewGuid()}"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount); SparkBatchJobOptions request = new SparkBatchJobOptions(name, file) { ClassName = "WordCount", Arguments = new List <string> { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchJob jobCreated = client.CreateSparkBatchJob(request); #endregion #region Snippet:SparkBatchSample1GetSparkJob SparkBatchJob job = client.GetSparkBatchJob(jobCreated.Id); Debug.WriteLine($"Job is returned with name {job.Name} and state {job.State}"); #endregion #region Snippet:SparkBatchSample1CancelSparkJob Response operation = client.CancelSparkBatchJob(jobCreated.Id); #endregion }
public void CreateClient() { // Environment variable with the Synapse workspace endpoint. string workspaceUrl = TestEnvironment.WorkspaceUrl; // Environment variable with the Synapse Spark pool name. string sparkPoolName = TestEnvironment.SparkPoolName; #region Snippet:CreateBatchClient // Create a new access Spark batch client using the default credential from Azure.Identity using environment variables previously set, // including AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, and AZURE_TENANT_ID. SparkBatchClient client = new SparkBatchClient(endpoint: new Uri(workspaceUrl), sparkPoolName: sparkPoolName, credential: new DefaultAzureCredential()); #endregion this.batchClient = client; }
public static async Task <List <SparkBatchJob> > ListSparkBatchJobsAsync(SparkBatchClient client, bool detailed = true) { List <SparkBatchJob> batches = new List <SparkBatchJob>(); int from = 0; int currentPageSize; int pageSize = 20; do { SparkBatchJobCollection page = (await client.GetSparkBatchJobsAsync(detailed: detailed, from: from, size: pageSize)).Value; currentPageSize = page.Total; from += currentPageSize; batches.AddRange(page.Sessions); } while (currentPageSize == pageSize); return(batches); }
public async Task TestGetSparkBatchJob() { SparkBatchJobCollection sparkJobs = (await SparkBatchClient.GetSparkBatchJobsAsync()).Value; foreach (SparkBatchJob expectedSparkJob in sparkJobs.Sessions) { try { SparkBatchJob actualSparkJob = await SparkBatchClient.GetSparkBatchJobAsync(expectedSparkJob.Id); ValidateSparkBatchJob(expectedSparkJob, actualSparkJob); } catch (Azure.RequestFailedException) { } } }
public void SubmitSparkJobSync() { #region Snippet:CreateSparkBatchClient #if SNIPPET // Replace the strings below with the spark, endpoint, and file system information string sparkPoolName = "<my-spark-pool-name>"; string endpoint = "<my-endpoint-url>"; string storageAccount = "<my-storage-account-name>"; string fileSystem = "<my-storage-filesystem-name>"; #else string sparkPoolName = TestEnvironment.SparkPoolName; string endpoint = TestEnvironment.EndpointUrl; string storageAccount = TestEnvironment.StorageAccountName; string fileSystem = TestEnvironment.StorageFileSystemName; #endif SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential()); #endregion #region Snippet:SubmitSparkBatchJob string name = $"batch-{Guid.NewGuid()}"; string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount); SparkBatchJobOptions request = new SparkBatchJobOptions(name, file) { ClassName = "WordCount", Arguments = { string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount), string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/", fileSystem, storageAccount), }, DriverMemory = "28g", DriverCores = 4, ExecutorMemory = "28g", ExecutorCores = 4, ExecutorCount = 2 }; SparkBatchOperation createOperation = client.StartCreateSparkBatchJob(request); while (!createOperation.HasCompleted) { System.Threading.Thread.Sleep(2000); createOperation.UpdateStatus(); } SparkBatchJob jobCreated = createOperation.Value; #endregion #region Snippet:ListSparkBatchJobs Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs(); foreach (SparkBatchJob job in jobs.Value.Sessions) { Console.WriteLine(job.Name); } #endregion #region Snippet:GetSparkBatchJob SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id); Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}"); #endregion #region Snippet:CancelSparkBatchJob Response operation = client.CancelSparkBatchJob(jobCreated.Id); #endregion }