/// <summary> /// Creates a job and adds a task to it. The task is a /// custom executable which has a resource file associated with it. /// </summary> /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param> /// <param name="storageAccount">The cloud storage account to upload files to.</param> /// <param name="jobId">The ID of the job.</param> /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns> private async Task SubmitJobAsync(BatchClient batchClient, CloudStorageAccount storageAccount, string jobId) { // create an empty unbound Job CloudJob unboundJob = batchClient.JobOperations.CreateJob(); unboundJob.Id = jobId; unboundJob.PoolInformation = new PoolInformation() { PoolId = this.jobManagerSettings.PoolId }; // Upload the required files for the job manager task await SampleHelpers.UploadResourcesAsync(storageAccount, this.jobManagerSettings.BlobContainer, JobManagerRequiredFiles); string containerSas = SampleHelpers.ConstructContainerSas(storageAccount, this.jobManagerSettings.BlobContainer); List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSas, JobManagerRequiredFiles); // Set up the JobManager environment settings List <EnvironmentSetting> jobManagerEnvironmentSettings = new List <EnvironmentSetting>() { // No need to pass the batch account name as an environment variable since the batch service provides // an environment variable for each task which contains the account name new EnvironmentSetting("SAMPLE_BATCH_KEY", this.accountSettings.BatchAccountKey), new EnvironmentSetting("SAMPLE_BATCH_URL", this.accountSettings.BatchServiceUrl), new EnvironmentSetting("SAMPLE_STORAGE_ACCOUNT", this.accountSettings.StorageAccountName), new EnvironmentSetting("SAMPLE_STORAGE_KEY", this.accountSettings.StorageAccountKey), new EnvironmentSetting("SAMPLE_STORAGE_URL", this.accountSettings.StorageServiceUrl), }; unboundJob.JobManagerTask = new JobManagerTask() { Id = JobManagerTaskId, CommandLine = JobManagerTaskExe, ResourceFiles = jobManagerResourceFiles, KillJobOnCompletion = true, EnvironmentSettings = jobManagerEnvironmentSettings }; // Commit Job to create it in the service await unboundJob.CommitAsync(); }
/// <summary> /// Populates Azure Storage with the required files, and /// submits the job to the Azure Batch service. /// </summary> public async Task RunAsync() { Console.WriteLine("Running with the following settings: "); Console.WriteLine("----------------------------------------"); Console.WriteLine(this.textSearchSettings.ToString()); Console.WriteLine(this.accountSettings.ToString()); CloudStorageAccount cloudStorageAccount = new CloudStorageAccount( new StorageCredentials( this.accountSettings.StorageAccountName, this.accountSettings.StorageAccountKey), this.accountSettings.StorageServiceUrl, useHttps: true); //Upload resources if required. if (this.textSearchSettings.ShouldUploadResources) { Console.WriteLine("Splitting file: {0} into {1} subfiles", Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); //Split the text file into the correct number of files for consumption by the mapper tasks. FileSplitter splitter = new FileSplitter(); List <string> mapperTaskFiles = await splitter.SplitAsync( Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList(); await SampleHelpers.UploadResourcesAsync( cloudStorageAccount, this.textSearchSettings.BlobContainer, files); } //Generate a SAS for the container. string containerSasUrl = SampleHelpers.ConstructContainerSas( cloudStorageAccount, this.textSearchSettings.BlobContainer); //Set up the Batch Service credentials used to authenticate with the Batch Service. BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials( this.accountSettings.BatchServiceUrl, this.accountSettings.BatchAccountName, this.accountSettings.BatchAccountKey); using (BatchClient batchClient = await BatchClient.OpenAsync(credentials)) { // // Construct the job properties in local memory before commiting them to the Batch Service. // //Allow enough compute nodes in the pool to run each mapper task, and 1 extra to run the job manager. int numberOfPoolComputeNodes = 1 + this.textSearchSettings.NumberOfMapperTasks; //Define the pool specification for the pool which the job will run on. PoolSpecification poolSpecification = new PoolSpecification() { TargetDedicated = numberOfPoolComputeNodes, VirtualMachineSize = "small", //You can learn more about os families and versions at: //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix OSFamily = "4", TargetOSVersion = "*" }; //Use the auto pool feature of the Batch Service to create a pool when the job is created. //This creates a new pool for each job which is added. AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification() { AutoPoolIdPrefix = "TextSearchPool", KeepAlive = false, PoolLifetimeOption = PoolLifetimeOption.Job, PoolSpecification = poolSpecification }; //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above. PoolInformation poolInformation = new PoolInformation() { AutoPoolSpecification = autoPoolSpecification }; //Define the job manager for this job. This job manager will run first and will submit the tasks for //the job. The job manager is the executable which manages the lifetime of the job //and all tasks which should run for the job. In this case, the job manager submits the mapper and reducer tasks. List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSasUrl, Constants.RequiredExecutableFiles); const string jobManagerTaskId = "JobManager"; JobManagerTask jobManagerTask = new JobManagerTask() { ResourceFiles = jobManagerResourceFiles, CommandLine = Constants.JobManagerExecutable, //Determines if the job should terminate when the job manager process exits. KillJobOnCompletion = true, Id = jobManagerTaskId }; //Create the unbound job in local memory. An object which exists only in local memory (and not on the Batch Service) is "unbound". string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss"); CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation); unboundJob.JobManagerTask = jobManagerTask; //Assign the job manager task to this job try { //Commit the unbound job to the Batch Service. Console.WriteLine("Adding job: {0} to the Batch Service.", unboundJob.Id); await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above. // // Wait for the job manager task to complete. // //An object which is backed by a corresponding Batch Service object is "bound." CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId); CloudTask boundJobManagerTask = await boundJob.GetTaskAsync(jobManagerTaskId); TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30); // Monitor the current tasks to see when they are done. // Occasionally a task may get killed and requeued during an upgrade or hardware failure, including the job manager // task. The job manager will be re-run in this case. Robustness against this was not added into the sample for // simplicity, but should be added into any production code. Console.WriteLine("Waiting for job's tasks to complete"); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> { boundJobManagerTask }, TaskState.Completed, maxJobCompletionTimeout); Console.WriteLine("Done waiting for job manager task."); await boundJobManagerTask.RefreshAsync(); //Check to ensure the job manager task exited successfully. await Helpers.CheckForTaskSuccessAsync(boundJobManagerTask, dumpStandardOutOnTaskSuccess : false); if (timedOut) { throw new TimeoutException(string.Format("Timed out waiting for job manager task to complete.")); } // // Download and write out the reducer tasks output // string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.BlobContainer, Constants.ReducerTaskResultBlobName); Console.WriteLine("Reducer reuslts:"); Console.WriteLine(reducerText); } finally { //Delete the job. //This will delete the auto pool associated with the job as long as the pool //keep alive property is set to false. if (this.textSearchSettings.ShouldDeleteJob) { Console.WriteLine("Deleting job {0}", jobId); batchClient.JobOperations.DeleteJob(jobId); } //Note that there were files uploaded to a container specified in the //configuration file. This container will not be deleted or cleaned up by this sample. } } }
/// <summary> /// Runs the job manager task. /// </summary> public async Task RunAsync() { Console.WriteLine("JobManager for account: {0}, job: {1} has started...", this.accountName, this.jobId); Console.WriteLine(); Console.WriteLine("JobManager running with the following settings: "); Console.WriteLine("----------------------------------------"); Console.WriteLine(this.configurationSettings.ToString()); //Set up the Batch Service credentials used to authenticate with the Batch Service. BatchSharedKeyCredentials batchSharedKeyCredentials = new BatchSharedKeyCredentials( this.configurationSettings.BatchServiceUrl, this.configurationSettings.BatchAccountName, this.configurationSettings.BatchAccountKey); CloudStorageAccount cloudStorageAccount = new CloudStorageAccount( new StorageCredentials( this.configurationSettings.StorageAccountName, this.configurationSettings.StorageAccountKey), this.configurationSettings.StorageServiceUrl, useHttps: true); using (BatchClient batchClient = await BatchClient.OpenAsync(batchSharedKeyCredentials)) { //Construct a container SAS to provide the Batch Service access to the files required to //run the mapper and reducer tasks. string containerSas = SampleHelpers.ConstructContainerSas( cloudStorageAccount, this.configurationSettings.BlobContainer); // // Submit mapper tasks. // await this.SubmitMapperTasksAsync(batchClient, containerSas); // // Wait for the mapper tasks to complete. // await this.WaitForMapperTasksToCompleteAsync(batchClient); // // Create the reducer task. // await this.SubmitReducerTaskAsync(batchClient, containerSas); // // Wait for the reducer task to complete. // string textToUpload = await this.WaitForReducerTaskToCompleteAsync(batchClient); // // Upload the results of the reducer task to Azure storage for consumption later // await SampleHelpers.UploadBlobTextAsync(cloudStorageAccount, this.configurationSettings.BlobContainer, Constants.ReducerTaskResultBlobName, textToUpload); //The job manager has completed. Console.WriteLine("JobManager completed successfully."); } }
/// <summary> /// Populates Azure Storage with the required files, and /// submits the job to the Azure Batch service. /// </summary> public async Task RunAsync() { Console.WriteLine("Running with the following settings: "); Console.WriteLine("----------------------------------------"); Console.WriteLine(this.textSearchSettings.ToString()); Console.WriteLine(this.accountSettings.ToString()); CloudStorageAccount cloudStorageAccount = new CloudStorageAccount( new StorageCredentials( this.accountSettings.StorageAccountName, this.accountSettings.StorageAccountKey), this.accountSettings.StorageServiceUrl, useHttps: true); //Upload resources if required Console.WriteLine($"Creating container {this.textSearchSettings.OutputBlobContainer} if it doesn't exist..."); var blobClient = cloudStorageAccount.CreateCloudBlobClient(); var outputContainer = blobClient.GetContainerReference(this.textSearchSettings.OutputBlobContainer); await outputContainer.CreateIfNotExistsAsync(); if (this.textSearchSettings.ShouldUploadResources) { Console.WriteLine("Splitting file: {0} into {1} subfiles", Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); //Split the text file into the correct number of files for consumption by the mapper tasks. FileSplitter splitter = new FileSplitter(); List <string> mapperTaskFiles = await splitter.SplitAsync( Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList(); await SampleHelpers.UploadResourcesAsync( cloudStorageAccount, this.textSearchSettings.InputBlobContainer, files); } //Generate a SAS for the container. string inputContainerSasUrl = SampleHelpers.ConstructContainerSas( cloudStorageAccount, this.textSearchSettings.InputBlobContainer, permissions: WindowsAzure.Storage.Blob.SharedAccessBlobPermissions.Read); string outputContainerSasUrl = SampleHelpers.ConstructContainerSas( cloudStorageAccount, this.textSearchSettings.OutputBlobContainer, permissions: WindowsAzure.Storage.Blob.SharedAccessBlobPermissions.Read | WindowsAzure.Storage.Blob.SharedAccessBlobPermissions.Write); //Set up the Batch Service credentials used to authenticate with the Batch Service. BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials( this.accountSettings.BatchServiceUrl, this.accountSettings.BatchAccountName, this.accountSettings.BatchAccountKey); using (BatchClient batchClient = BatchClient.Open(credentials)) { // // Construct the job properties in local memory before commiting them to the Batch Service. // //Allow enough compute nodes in the pool to run each mapper task int numberOfPoolComputeNodes = this.textSearchSettings.NumberOfMapperTasks; //Define the pool specification for the pool which the job will run on. PoolSpecification poolSpecification = new PoolSpecification() { TargetDedicatedComputeNodes = numberOfPoolComputeNodes, VirtualMachineSize = "standard_d1_v2", //You can learn more about os families and versions at: //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix CloudServiceConfiguration = new CloudServiceConfiguration(osFamily: "5") }; //Use the auto pool feature of the Batch Service to create a pool when the job is created. //This creates a new pool for each job which is added. AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification() { AutoPoolIdPrefix = "TextSearchPool", KeepAlive = false, PoolLifetimeOption = PoolLifetimeOption.Job, PoolSpecification = poolSpecification }; //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above. PoolInformation poolInformation = new PoolInformation() { AutoPoolSpecification = autoPoolSpecification }; //Create the unbound job in local memory. An object which exists only in local memory (and not on the Batch Service) is "unbound". string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss"); CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation); unboundJob.UsesTaskDependencies = true; try { //Commit the unbound job to the Batch Service. Console.WriteLine($"Adding job: {unboundJob.Id} to the Batch Service."); await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above. // Add tasks to the job var mapperTasks = CreateMapperTasks(inputContainerSasUrl, outputContainerSasUrl); var reducerTask = CreateReducerTask(inputContainerSasUrl, outputContainerSasUrl, mapperTasks); var tasksToAdd = Enumerable.Concat(mapperTasks, new[] { reducerTask }); //Submit the unbound task collection to the Batch Service. //Use the AddTask method which takes a collection of CloudTasks for the best performance. Console.WriteLine("Submitting {0} mapper tasks", this.textSearchSettings.NumberOfMapperTasks); Console.WriteLine("Submitting 1 reducer task"); await batchClient.JobOperations.AddTaskAsync(jobId, tasksToAdd); //An object which is backed by a corresponding Batch Service object is "bound." CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId); // Update the job now that we've added tasks so that when all of the tasks which we have added // are complete, the job will automatically move to the completed state. boundJob.OnAllTasksComplete = OnAllTasksComplete.TerminateJob; boundJob.Commit(); boundJob.Refresh(); // // Wait for the tasks to complete. // List <CloudTask> tasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync(); TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30); // Monitor the current tasks to see when they are done. // Occasionally a task may get killed and requeued during an upgrade or hardware failure, // Robustness against this was not added into the sample for // simplicity, but should be added into any production code. Console.WriteLine("Waiting for job's tasks to complete"); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); try { await taskStateMonitor.WhenAll(tasks, TaskState.Completed, maxJobCompletionTimeout); } finally { Console.WriteLine("Done waiting for all tasks to complete"); // Refresh the task list tasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync(); //Check to ensure the job manager task exited successfully. foreach (var task in tasks) { await Helpers.CheckForTaskSuccessAsync(task, dumpStandardOutOnTaskSuccess : false); } } // // Download and write out the reducer tasks output // string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.OutputBlobContainer, Constants.ReducerTaskResultBlobName); Console.WriteLine("Reducer reuslts:"); Console.WriteLine(reducerText); } finally { //Delete the job. //This will delete the auto pool associated with the job as long as the pool //keep alive property is set to false. if (this.textSearchSettings.ShouldDeleteJob) { Console.WriteLine($"Deleting job {jobId}"); await batchClient.JobOperations.DeleteJobAsync(jobId); } if (this.textSearchSettings.ShouldDeleteContainers) { Console.WriteLine("Deleting containers"); var inputContainer = blobClient.GetContainerReference(this.textSearchSettings.InputBlobContainer); await inputContainer.DeleteIfExistsAsync(); await outputContainer.DeleteIfExistsAsync(); } } } }