示例#1
0
        public void CreateSparkBatchJob()
        {
            // Environment variable with the storage account associated with the Synapse workspace endpoint.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of the storage account.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:CreateBatchJob
            string name = $"batchSample";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount);
            SparkBatchJobOptions options = new SparkBatchJobOptions(name: name, file: file)
            {
                ClassName = "WordCount",
                Arguments = new List <string>
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = batchClient.CreateSparkBatchJob(options);
            #endregion
        }
        public async Task TestSparkBatchJob()
        {
            SparkBatchClient client = CreateClient();

            // Submit the Spark job
            SparkBatchJobOptions createParams    = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment);
            SparkBatchOperation  createOperation = await client.StartCreateSparkBatchJobAsync(createParams);

            SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync();

            // Verify the Spark batch job completes successfully
            Assert.True("success".Equals(jobCreateResponse.State, StringComparison.OrdinalIgnoreCase) && jobCreateResponse.Result == SparkBatchJobResultType.Succeeded,
                        string.Format(
                            "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}",
                            jobCreateResponse.Id,
                            jobCreateResponse.State,
                            jobCreateResponse.Result,
                            string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>())
                            )
                        );

            // Get the list of Spark batch jobs and check that the submitted job exists
            List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client);

            Assert.NotNull(listJobResponse);
            Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id));
        }
示例#3
0
        public async Task TestSparkBatchJobCompletesWhenJobStarts()
        {
            SparkBatchClient client = CreateClient();

            // Submit the Spark job
            SparkBatchJobOptions createParams    = SparkTestUtilities.CreateSparkJobRequestParameters(Recording, TestEnvironment);
            SparkBatchOperation  createOperation = await client.StartCreateSparkBatchJobAsync(createParams);

            SparkBatchJob jobCreateResponse = await createOperation.WaitForCompletionAsync();

            // Verify the Spark batch job submission starts successfully
            Assert.True(LivyStates.Starting == jobCreateResponse.State || LivyStates.Running == jobCreateResponse.State || LivyStates.Success == jobCreateResponse.State,
                        string.Format(
                            "Job: {0} did not return success. Current job state: {1}. Error (if any): {2}",
                            jobCreateResponse.Id,
                            jobCreateResponse.State,
                            string.Join(", ", jobCreateResponse.Errors ?? new List <SparkServiceError>())
                            )
                        );

            // Get the list of Spark batch jobs and check that the submitted job exists
            List <SparkBatchJob> listJobResponse = await SparkTestUtilities.ListSparkBatchJobsAsync(client);

            Assert.NotNull(listJobResponse);
            Assert.IsTrue(listJobResponse.Any(job => job.Id == jobCreateResponse.Id));
        }
        public async Task TestSparkBatchJob()
        {
            // Submit the Spark job
            SparkBatchJobOptions createParams      = this.CreateSparkJobRequestParameters();
            SparkBatchJob        jobCreateResponse = (await SparkBatchClient.CreateSparkBatchJobAsync(createParams)).Value;

            // Poll the Spark job until it finishes
            SparkBatchJob getJobResponse = await this.PollSparkBatchJobSubmissionAsync(jobCreateResponse);

            // Verify the Spark batch job completes successfully
            Assert.True("success".Equals(getJobResponse.State, StringComparison.OrdinalIgnoreCase) && getJobResponse.Result == SparkBatchJobResultType.Succeeded,
                        string.Format(
                            "Job: {0} did not return success. Current job state: {1}. Actual result: {2}. Error (if any): {3}",
                            getJobResponse.Id,
                            getJobResponse.State,
                            getJobResponse.Result,
                            string.Join(", ", getJobResponse.Errors ?? new List <SparkServiceError>())
                            )
                        );

            // Get the list of Spark batch jobs and check that the submitted job exists
            List <SparkBatchJob> listJobResponse = await this.ListSparkBatchJobsAsync();

            Assert.NotNull(listJobResponse);
            Assert.IsTrue(listJobResponse.Any(job => job.Id == getJobResponse.Id));
        }
        public void SubmitSparkJobSync()
        {
            // Environment variable with the Synapse workspace endpoint.
            string endpoint = TestEnvironment.EndpointUrl;

            // Environment variable with the Synapse Spark pool name.
            string sparkPoolName = TestEnvironment.SparkPoolName;

            // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:CreateSparkBatchClient
            SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential());
            #endregion

            #region Snippet:SubmitSparkBatchJob
            string name = $"batch-{Guid.NewGuid()}";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount);
            SparkBatchJobOptions request = new SparkBatchJobOptions(name, file)
            {
                ClassName = "WordCount",
                Arguments =
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/",         fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = client.CreateSparkBatchJob(request);
            #endregion

            #region Snippet:ListSparkBatchJobs
            Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs();
            foreach (SparkBatchJob job in jobs.Value.Sessions)
            {
                Console.WriteLine(job.Name);
            }
            #endregion

            #region Snippet:GetSparkBatchJob
            SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id);
            Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}");
            #endregion

            #region Snippet:DeleteSparkBatchJob
            Response operation = client.CancelSparkBatchJob(jobCreated.Id);
            #endregion
        }
        public SparkBatchJob SubmitSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool waitForCompletion)
        {
            var batch = _sparkBatchClient.CreateSparkBatchJob(sparkBatchJobOptions, detailed: true);

            if (!waitForCompletion)
            {
                return(batch);
            }

            return(PollSparkBatchJobSubmission(batch));
        }
示例#7
0
        public SparkBatchJob SubmitSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool waitForCompletion)
        {
            var batch = _sparkBatchClient.StartCreateSparkBatchJob(sparkBatchJobOptions, detailed: true);

            if (!waitForCompletion)
            {
                return(GetSparkBatchJob(int.Parse(batch.Id)));
            }

            return(batch.Poll().Value);
        }
示例#8
0
        public void SparkSample()
        {
            #region Snippet:CreateBatchClient
            // Replace the string below with your actual endpoint url.
            string endpoint = "<my-endpoint-url>";
            /*@@*/ endpoint = TestEnvironment.EndpointUrl;
            string           sparkPoolName = TestEnvironment.SparkPoolName;
            SparkBatchClient client        = new SparkBatchClient(endpoint: new Uri(endpoint), sparkPoolName: sparkPoolName, credential: new DefaultAzureCredential());
            #endregion

            // Environment variable with the storage account associated with the Synapse workspace endpoint.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of the storage account.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:CreateBatchJob
            string name = $"batchSample";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount);
            SparkBatchJobOptions options = new SparkBatchJobOptions(name: name, file: file)
            {
                ClassName = "WordCount",
                Arguments =
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/",         fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = client.CreateSparkBatchJob(options);
            #endregion

            #region Snippet:ListSparkBatchJobs
            Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs();
            foreach (SparkBatchJob job in jobs.Value.Sessions)
            {
                Console.WriteLine(job.Name);
            }
            #endregion

            #region Snippet:DeleteSparkBatchJob
            /*@@*/ int jobId = jobs.Value.Sessions.First().Id;
            // Replace the integer below with your actual job ID.
            //@@ string jobId = 0;
            Response operation = client.CancelSparkBatchJob(jobId);
            #endregion
        }
 public virtual Response <SparkBatchJob> CreateSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default)
 {
     using var scope = _clientDiagnostics.CreateScope("SparkBatchClient.CreateSparkBatchJob");
     scope.Start();
     try
     {
         return(RestClient.CreateSparkBatchJob(sparkBatchJobOptions, detailed, cancellationToken));
     }
     catch (Exception e)
     {
         scope.Failed(e);
         throw;
     }
 }
示例#10
0
        public void SubmitSparkJobSync()
        {
            // Environment variable with the Synapse workspace endpoint.
            string workspaceUrl = TestEnvironment.WorkspaceUrl;

            // Environment variable with the Synapse Spark pool name.
            string sparkPoolName = TestEnvironment.SparkPoolName;

            // Environment variable with the ADLS Gen2 storage account associated with the Synapse workspace.
            string storageAccount = TestEnvironment.StorageAccountName;

            // Environment variable with the file system of ADLS Gen2 storage account associated with the Synapse workspace.
            string fileSystem = TestEnvironment.StorageFileSystemName;

            #region Snippet:SparkBatchSample1SparkBatchClient
            SparkBatchClient client = new SparkBatchClient(new Uri(workspaceUrl), sparkPoolName, new DefaultAzureCredential());
            #endregion

            #region Snippet:SparkBatchSample1SubmitSparkJob
            string name = $"batch-{Guid.NewGuid()}";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/wordcount.jar", fileSystem, storageAccount);
            SparkBatchJobOptions request = new SparkBatchJobOptions(name, file)
            {
                ClassName = "WordCount",
                Arguments = new List <string>
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/java/wordcount/result/", fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchJob jobCreated = client.CreateSparkBatchJob(request);
            #endregion

            #region Snippet:SparkBatchSample1GetSparkJob
            SparkBatchJob job = client.GetSparkBatchJob(jobCreated.Id);
            Debug.WriteLine($"Job is returned with name {job.Name} and state {job.State}");
            #endregion

            #region Snippet:SparkBatchSample1CancelSparkJob
            Response operation = client.CancelSparkBatchJob(jobCreated.Id);
            #endregion
        }
        public void SubmitSparkJobSync()
        {
            #region Snippet:CreateSparkBatchClient
#if SNIPPET
            // Replace the strings below with the spark, endpoint, and file system information
            string sparkPoolName  = "<my-spark-pool-name>";
            string endpoint       = "<my-endpoint-url>";
            string storageAccount = "<my-storage-account-name>";
            string fileSystem     = "<my-storage-filesystem-name>";
#else
            string sparkPoolName  = TestEnvironment.SparkPoolName;
            string endpoint       = TestEnvironment.EndpointUrl;
            string storageAccount = TestEnvironment.StorageAccountName;
            string fileSystem     = TestEnvironment.StorageFileSystemName;
#endif

            SparkBatchClient client = new SparkBatchClient(new Uri(endpoint), sparkPoolName, new DefaultAzureCredential());
            #endregion

            #region Snippet:SubmitSparkBatchJob
            string name = $"batch-{Guid.NewGuid()}";
            string file = string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/wordcount.zip", fileSystem, storageAccount);
            SparkBatchJobOptions request = new SparkBatchJobOptions(name, file)
            {
                ClassName = "WordCount",
                Arguments =
                {
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/shakespeare.txt", fileSystem, storageAccount),
                    string.Format("abfss://{0}@{1}.dfs.core.windows.net/samples/net/wordcount/result/",         fileSystem, storageAccount),
                },
                DriverMemory   = "28g",
                DriverCores    = 4,
                ExecutorMemory = "28g",
                ExecutorCores  = 4,
                ExecutorCount  = 2
            };

            SparkBatchOperation createOperation = client.StartCreateSparkBatchJob(request);
            while (!createOperation.HasCompleted)
            {
                System.Threading.Thread.Sleep(2000);
                createOperation.UpdateStatus();
            }
            SparkBatchJob jobCreated = createOperation.Value;
            #endregion

            #region Snippet:ListSparkBatchJobs
            Response <SparkBatchJobCollection> jobs = client.GetSparkBatchJobs();
            foreach (SparkBatchJob job in jobs.Value.Sessions)
            {
                Console.WriteLine(job.Name);
            }
            #endregion

            #region Snippet:GetSparkBatchJob
            SparkBatchJob retrievedJob = client.GetSparkBatchJob(jobCreated.Id);
            Debug.WriteLine($"Job is returned with name {retrievedJob.Name} and state {retrievedJob.State}");
            #endregion

            #region Snippet:CancelSparkBatchJob
            Response operation = client.CancelSparkBatchJob(jobCreated.Id);
            #endregion
        }
        public override void ExecuteCmdlet()
        {
            this.Language = LanguageType.Parse(this.Language);
            if (string.IsNullOrEmpty(this.MainClassName))
            {
                if (LanguageType.SparkDotNet == this.Language || LanguageType.Spark == this.Language)
                {
                    throw new SynapseException(Resources.MissingMainClassName);
                }
            }

            if (this.IsParameterBound(c => c.SparkPoolObject))
            {
                var resourceIdentifier = new ResourceIdentifier(this.SparkPoolObject.Id);
                this.WorkspaceName = resourceIdentifier.ParentResource;
                this.WorkspaceName = this.WorkspaceName.Substring(this.WorkspaceName.LastIndexOf('/') + 1);
                this.SparkPoolName = resourceIdentifier.ResourceName;
            }

            this.MainDefinitionFile = Utils.NormalizeUrl(this.MainDefinitionFile);
            if (this.CommandLineArgument != null)
            {
                for (int i = 0; i < this.CommandLineArgument.Length; i++)
                {
                    this.CommandLineArgument[i] = Utils.NormalizeUrl(this.CommandLineArgument[i]);
                }
            }

            if (this.ReferenceFile != null)
            {
                for (int i = 0; i < this.ReferenceFile.Length; i++)
                {
                    this.ReferenceFile[i] = Utils.NormalizeUrl(this.ReferenceFile[i]);
                }
            }

            Utils.CategorizedFiles(this.ReferenceFile, out IList <string> jars, out IList <string> files);
            bool isSparkDotNet = this.Language == LanguageType.SparkDotNet;
            var  batchRequest  = new SparkBatchJobOptions(this.Name, isSparkDotNet ? SynapseConstants.SparkDotNetJarFile : this.MainDefinitionFile)
            {
                ClassName = isSparkDotNet
                    ? SynapseConstants.SparkDotNetClassName
                    : (this.Language == LanguageType.PySpark ? null : this.MainClassName),
                Arguments = isSparkDotNet
                    ? new List <string> {
                    this.MainDefinitionFile, this.MainClassName
                }
                .Concat(this.CommandLineArgument ?? new string[0]).ToArray()
                    : this.CommandLineArgument,
                Jars     = jars,
                Files    = files,
                Archives = isSparkDotNet
                    ? new List <string> {
                    $"{this.MainDefinitionFile}#{SynapseConstants.SparkDotNetUdfsFolderName}"
                }
                    : null,
                Configuration  = this.Configuration?.ToDictionary(),
                ExecutorMemory = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Memory + "g",
                ExecutorCores  = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Cores,
                DriverMemory   = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Memory + "g",
                DriverCores    = SynapseConstants.ComputeNodeSizes[this.ExecutorSize].Cores,
                ExecutorCount  = this.ExecutorCount
            };

            // Ensure the relative path of UDFs is add to "--conf".
            if (isSparkDotNet)
            {
                batchRequest.Configuration = batchRequest.Configuration ?? new Dictionary <string, string>();
                string udfsRelativePath = "./" + SynapseConstants.SparkDotNetUdfsFolderName;
                batchRequest.Configuration.TryGetValue(SynapseConstants.SparkDotNetAssemblySearchPathsKey, out string pathValue);
                var paths = pathValue?.Split(',').Select(path => path.Trim()).Where(path => !string.IsNullOrEmpty(path)).ToList() ?? new List <string>();
                if (!paths.Contains(udfsRelativePath))
                {
                    paths.Add(udfsRelativePath);
                }

                batchRequest.Configuration[SynapseConstants.SparkDotNetAssemblySearchPathsKey] = string.Join(",", paths);
            }

            if (this.ShouldProcess(this.SparkPoolName, string.Format(Resources.SubmittingSynapseSparkJob, this.SparkPoolName, this.WorkspaceName)))
            {
                var jobInformation = SynapseAnalyticsClient.SubmitSparkBatchJob(batchRequest, waitForCompletion: false);
                WriteObject(new PSSynapseSparkJob(jobInformation));
            }
        }
 public virtual async Task <Response <SparkBatchJob> > CreateSparkBatchJobAsync(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default)
 {
     using var scope = _clientDiagnostics.CreateScope("SparkBatchClient.CreateSparkBatchJob");
     scope.Start();
     try
     {
         return(await RestClient.CreateSparkBatchJobAsync(sparkBatchJobOptions, detailed, cancellationToken).ConfigureAwait(false));
     }
     catch (Exception e)
     {
         scope.Failed(e);
         throw;
     }
 }
 private async Task <SparkBatchOperation> StartCreateSparkBatchJobInternal(bool async, SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default)
 {
     using DiagnosticScope scope = _clientDiagnostics.CreateScope($"{nameof(SparkBatchClient)}.{nameof(StartCreateSparkBatchJob)}");
     scope.Start();
     try
     {
         Response <SparkBatchJob> batchSession;
         if (async)
         {
             batchSession = await RestClient.CreateSparkBatchJobAsync(sparkBatchJobOptions, detailed, cancellationToken).ConfigureAwait(false);
         }
         else
         {
             batchSession = RestClient.CreateSparkBatchJob(sparkBatchJobOptions, detailed, cancellationToken);
         }
         return(new SparkBatchOperation(this, _clientDiagnostics, batchSession));
     }
     catch (Exception e)
     {
         scope.Failed(e);
         throw;
     }
 }
 public virtual SparkBatchOperation StartCreateSparkBatchJob(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default)
 => StartCreateSparkBatchJobInternal(false, sparkBatchJobOptions, detailed, cancellationToken).EnsureCompleted();
 public virtual async Task <SparkBatchOperation> StartCreateSparkBatchJobAsync(SparkBatchJobOptions sparkBatchJobOptions, bool?detailed = null, CancellationToken cancellationToken = default)
 => await StartCreateSparkBatchJobInternal(true, sparkBatchJobOptions, detailed, cancellationToken).ConfigureAwait(false);