Beispiel #1
0
        public async Task TestSparkBatchJobCompletesWhenJobStarts()
        {
            SparkBatchClient client = CreateClient();

            // Submit the Spark job
            SparkBatchJobOptions createParams    = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment);
            SparkBatchOperation  createOperation = await client.StartCreateSparkBatchJobAsync(createParams);

            SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync();

            // Verify the Spark batch job submission starts successfully
            Assert.True(LivyStates.Starting == jobCreateResponse.State || LivyStates.Running == jobCreateResponse.State || LivyStates.Success == jobCreateResponse.State,
                        string.Format(
                            "Job: {0} did not return success. Current job state: {1}. Error (if any): {2}",
                            jobCreateResponse.Id,
                            jobCreateResponse.State,
                            string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>())
                            )
                        );

            // Get the list of Spark batch jobs and check that the submitted job exists
            List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client);

            Assert.NotNull(listJobResponse);
            Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id));
        }
        public async Task TestSparkBatchJob()
        {
            SparkBatchClient client = CreateClient();

            // Submit the Spark job
            SparkBatchJobOptions createParams    = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment);
            SparkBatchOperation  createOperation = await client.StartCreateSparkBatchJobAsync(createParams);

            SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync();

            // Verify the Spark batch job completes successfully
            Assert.True("success".Equals(jobCreateResponse.State, StringComparison.OrdinalIgnoreCase) && jobCreateResponse.Result == SparkBatchJobResultType.Succeeded,
                        string.Format(
                            "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}",
                            jobCreateResponse.Id,
                            jobCreateResponse.State,
                            jobCreateResponse.Result,
                            string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>())
                            )
                        );

            // Get the list of Spark batch jobs and check that the submitted job exists
            List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client);

            Assert.NotNull(listJobResponse);
            Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id));
        }
        public async Task TestSparkBatchJob()
        {
            // Submit the Spark job
            SparkBatchJobOptions createParams      = this.CreateSparkJobRequestParameters();
            SparkBatchJob        jobCreateResponse = (await SparkBatchClient.CreateSparkBatchJobAsync(createParams)).Value;

            // Poll the Spark job until it finishes
            SparkBatchJob getJobResponse = await this.PollSparkBatchJobSubmissionAsync(jobCreateResponse);

            // Verify the Spark batch job completes successfully
            Assert.True("success".Equals(getJobResponse.State, StringComparison.OrdinalIgnoreCase) && getJobResponse.Result == SparkBatchJobResultType.Succeeded,
                        string.Format(
                            "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}",
                            getJobResponse.Id,
                            getJobResponse.State,
                            getJobResponse.Result,
                            string.Join(", ", getJobResponse.Errors ?? new List <SparkServiceError>())
                            )
                        );

            // Get the list of Spark batch jobs and check that the submitted job exists
            List <SparkBatchJob> listJobResponse = await this.ListSparkBatchJobsAsync();

            Assert.NotNull(listJobResponse);
            Assert.IsTrue(listJobResponse.Any(job => job.Id == getJobResponse.Id));
        }
        public void SubmitSparkJobSync()
        {
            // Environment variable with the Synapse workspace endpoint.
            string endpoint = TestEnvironment.EndpointUrl;

            // Environment variable with the Synapse Spark pool name.
            string sparkPoolName = TestEnvironment.SparkPoolName;

            // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:CreateSparkBatchClient
            SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential());
            #endregion

            #region Snippet:SubmitSparkBatchJob
            string name = $"batch-{Guid.NewGuid()}";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount);
            SparkBatchJobOptions request = new SparkBatchJobOptions(name, file)
            {
                ClassName = "WordCount",
                Arguments =
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/",         fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = client.CreateSparkBatchJob(request);
            #endregion

            #region Snippet:ListSparkBatchJobs
            Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs();
            foreach (SparkBatchJob job in jobs.Value.Sessions)
            {
                Console.WriteLine(job.Name);
            }
            #endregion

            #region Snippet:GetSparkBatchJob
            SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id);
            Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}");
            #endregion

            #region Snippet:DeleteSparkBatchJob
            Response operation = client.CancelSparkBatchJob(jobCreated.Id);
            #endregion
        }
        public async Task TestGetSparkBatchJob()
        {
            SparkBatchJobCollection sparkJobs = (await SparkBatchClient.GetSparkBatchJobsAsync()).Value;

            foreach (SparkBatchJob expectedSparkJob in sparkJobs.Sessions)
            {
                SparkBatchJob actualSparkJob = await SparkBatchClient.GetSparkBatchJobAsync(expectedSparkJob.Id);

                ValidateSparkBatchJob(expectedSparkJob, actualSparkJob);
            }
        }
Beispiel #6
0
        public void SparkSample()
        {
            #region Snippet:CreateBatchClient
            // Replace the string below with your actual endpoint url.
            string endpoint = "<my-endpoint-url>";
            /*@@*/ endpoint = TestEnvironment.EndpointUrl;
            string           sparkPoolName = TestEnvironment.SparkPoolName;
            SparkBatchClient client        = new SparkBatchClient(endpoint: new Uri(endpoint), sparkPoolName: sparkPoolName, credential: new DefaultAzureCredential());
            #endregion

            // Environment variable with the storage account associated with the Synapse workspace endpoint.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of the storage account.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:CreateBatchJob
            string name = $"batchSample";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount);
            SparkBatchJobOptions options = new SparkBatchJobOptions(name: name, file: file)
            {
                ClassName = "WordCount",
                Arguments =
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/",         fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = client.CreateSparkBatchJob(options);
            #endregion

            #region Snippet:ListSparkBatchJobs
            Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs();
            foreach (SparkBatchJob job in jobs.Value.Sessions)
            {
                Console.WriteLine(job.Name);
            }
            #endregion

            #region Snippet:DeleteSparkBatchJob
            /*@@*/ int jobId = jobs.Value.Sessions.First().Id;
            // Replace the integer below with your actual job ID.
            //@@ string jobId = 0;
            Response operation = client.CancelSparkBatchJob(jobId);
            #endregion
        }
        public SynapseAnalyticsSparkClient(string workspaceName, string sparkPoolName, IAzureContext context)
        {
            if (context == null)
            {
                throw new AzPSInvalidOperationException(Resources.InvalidDefaultSubscription);
            }

            string suffix = context.Environment.GetEndpoint(AzureEnvironment.ExtendedEndpoint.AzureSynapseAnalyticsEndpointSuffix);
            Uri    uri    = new Uri("https://" + workspaceName + "." + suffix);

            _sparkBatchClient   = new SparkBatchClient(uri, sparkPoolName, new AzureSessionCredential(context));
            _sparkSessionClient = new SparkSessionClient(uri, sparkPoolName, new AzureSessionCredential(context));
        }
Beispiel #8
0
        public void SubmitSparkJobSync()
        {
            // Environment variable with the Synapse workspace endpoint.
            string workspaceUrl = TestEnvironment.WorkspaceUrl;

            // Environment variable with the Synapse Spark pool name.
            string sparkPoolName = TestEnvironment.SparkPoolName;

            // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:SparkBatchSample1SparkBatchClient
            SparkBatchClient client = new SparkBatchClient(new Uri(workspaceUrl), sparkPoolName, new DefaultAzureCredential());
            #endregion

            #region Snippet:SparkBatchSample1SubmitSparkJob
            string name = $"batch-{Guid.NewGuid()}";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount);
            SparkBatchJobOptions request = new SparkBatchJobOptions(name, file)
            {
                ClassName = "WordCount",
                Arguments = new List <string>
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = client.CreateSparkBatchJob(request);
            #endregion

            #region Snippet:SparkBatchSample1GetSparkJob
            SparkBatchJob job = client.GetSparkBatchJob(jobCreated.Id);
            Debug.WriteLine($"Job is returned with name {job.Name} and state {job.State}");
            #endregion

            #region Snippet:SparkBatchSample1CancelSparkJob
            Response operation = client.CancelSparkBatchJob(jobCreated.Id);
            #endregion
        }
Beispiel #9
0
        public void CreateClient()
        {
            // Environment variable with the Synapse workspace endpoint.
            string workspaceUrl = TestEnvironment.WorkspaceUrl;

            // Environment variable with the Synapse Spark pool name.
            string sparkPoolName = TestEnvironment.SparkPoolName;

            #region Snippet:CreateBatchClient
            // Create a new access Spark batch client using the default credential from Azure.Identity using environment variables previously set,
            // including AZURE_CLIENT_ID, AZURE_CLIENT_SECRET, and AZURE_TENANT_ID.
            SparkBatchClient client = new SparkBatchClient(endpoint: new Uri(workspaceUrl), sparkPoolName: sparkPoolName, credential: new DefaultAzureCredential());
            #endregion

            this.batchClient = client;
        }
        public static async Task <List <SparkBatchJob> > ListSparkBatchJobsAsync(SparkBatchClient client, bool detailed = true)
        {
            List <SparkBatchJob> batches = new List <SparkBatchJob>();
            int from = 0;
            int currentPageSize;
            int pageSize = 20;

            do
            {
                SparkBatchJobCollection page = (await client.GetSparkBatchJobsAsync(detailed: detailed, from: from, size: pageSize)).Value;
                currentPageSize = page.Total;
                from           += currentPageSize;
                batches.AddRange(page.Sessions);
            } while (currentPageSize == pageSize);
            return(batches);
        }
        public async Task TestGetSparkBatchJob()
        {
            SparkBatchJobCollection sparkJobs = (await SparkBatchClient.GetSparkBatchJobsAsync()).Value;

            foreach (SparkBatchJob expectedSparkJob in sparkJobs.Sessions)
            {
                try
                {
                    SparkBatchJob actualSparkJob = await SparkBatchClient.GetSparkBatchJobAsync(expectedSparkJob.Id);

                    ValidateSparkBatchJob(expectedSparkJob, actualSparkJob);
                }
                catch (Azure.RequestFailedException)
                {
                }
            }
        }
        public void SubmitSparkJobSync()
        {
            #region Snippet:CreateSparkBatchClient
#if SNIPPET
            // Replace the strings below with the spark, endpoint, and file system information
            string sparkPoolName  = "<my-spark-pool-name>";
            string endpoint       = "<my-endpoint-url>";
            string storageAccount = "<my-storage-account-name>";
            string fileSystem     = "<my-storage-filesystem-name>";
#else
            string sparkPoolName  = TestEnvironment.SparkPoolName;
            string endpoint       = TestEnvironment.EndpointUrl;
            string storageAccount = TestEnvironment.StorageAccountName;
            string fileSystem     = TestEnvironment.StorageFileSystemName;
#endif

            SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential());
            #endregion

            #region Snippet:SubmitSparkBatchJob
            string name = $"batch-{Guid.NewGuid()}";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount);
            SparkBatchJobOptions request = new SparkBatchJobOptions(name, file)
            {
                ClassName = "WordCount",
                Arguments =
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/",         fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchOperation createOperation = client.StartCreateSparkBatchJob(request);
            while (!createOperation.HasCompleted)
            {
                System.Threading.Thread.Sleep(2000);
                createOperation.UpdateStatus();
            }
            SparkBatchJob jobCreated = createOperation.Value;
            #endregion

            #region Snippet:ListSparkBatchJobs
            Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs();
            foreach (SparkBatchJob job in jobs.Value.Sessions)
            {
                Console.WriteLine(job.Name);
            }
            #endregion

            #region Snippet:GetSparkBatchJob
            SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id);
            Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}");
            #endregion

            #region Snippet:CancelSparkBatchJob
            Response operation = client.CancelSparkBatchJob(jobCreated.Id);
            #endregion
        }