private async Task <string> WaitForReducerTaskToCompleteAsync(BatchClient batchClient) { //Get the bound reducer task and monitor it for completion. CloudTask boundReducerTask = await batchClient.JobOperations.GetTaskAsync(this.jobId, Constants.ReducerTaskId); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> { boundReducerTask }, TaskState.Completed, TimeSpan.FromMinutes(2)); //Refresh the reducer task to get the most recent information about it from the Batch Service. await boundReducerTask.RefreshAsync(); //Dump the reducer tasks exit code and scheduling error for debugging purposes. string stdOut = await Helpers.CheckForTaskSuccessAsync(boundReducerTask, dumpStandardOutOnTaskSuccess : true); //Handle the possibilty that the reducer task did not complete in the expected timeout. if (timedOut) { const string errorMessage = "Reducer task did not complete within expected timeout."; Console.WriteLine("Task {0} is in state: {1}", boundReducerTask.Id, boundReducerTask.State); Console.WriteLine(errorMessage); throw new TimeoutException(errorMessage); } return(stdOut); }
/// <summary> /// Waits for all tasks under the specified job to complete and then prints each task's output to the console. /// </summary> /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param> /// <param name="jobId">The ID of the job.</param> /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns> private static async Task WaitForJobAndPrintOutputAsync(BatchClient batchClient, string jobId) { Console.WriteLine("Waiting for all tasks to complete on job: {0} ...", jobId); // We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); List <CloudTask> ourTasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync(); // Wait for all tasks to reach the completed state. // If the pool is being resized then enough time is needed for the nodes to reach the idle state in order // for tasks to run on them. bool timedOut = await taskStateMonitor.WaitAllAsync(ourTasks, TaskState.Completed, TimeSpan.FromMinutes(10)); if (timedOut) { throw new TimeoutException("Timed out waiting for tasks"); } // dump task output foreach (CloudTask t in ourTasks) { Console.WriteLine("Task {0}", t.Id); //Read the standard out of the task NodeFile standardOutFile = await t.GetNodeFileAsync(Constants.StandardOutFileName); string standardOutText = await standardOutFile.ReadAsStringAsync(); Console.WriteLine("Standard out:"); Console.WriteLine(standardOutText); Console.WriteLine(); } }
private async Task WaitForMapperTasksToCompleteAsync(BatchClient batchClient) { Console.WriteLine("Waiting for the mapper tasks to complete..."); //List all the mapper tasks using an id filter. DetailLevel mapperTaskIdFilter = new ODATADetailLevel() { FilterClause = string.Format("startswith(id, '{0}')", Constants.MapperTaskPrefix) }; IEnumerable <CloudTask> tasksToMonitor = batchClient.JobOperations.ListTasks( this.jobId, detailLevel: mapperTaskIdFilter); // Use the task state monitor to wait for the tasks to complete. Monitoring the tasks // for completion is necessary if you are using KillJobOnCompletion = TRUE, as otherwise when the job manager // exits it will kill all of the tasks that are still running under the job. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(tasksToMonitor, TaskState.Completed, TimeSpan.FromMinutes(5)); //Get the list of mapper tasks in order to analyze their state and ensure they completed successfully. IPagedEnumerable <CloudTask> asyncEnumerable = batchClient.JobOperations.ListTasks( this.jobId, detailLevel: mapperTaskIdFilter); await asyncEnumerable.ForEachAsync(async cloudTask => { Console.WriteLine("Task {0} is in state: {1}", cloudTask.Id, cloudTask.State); await Helpers.CheckForTaskSuccessAsync(cloudTask, dumpStandardOutOnTaskSuccess: false); Console.WriteLine(); }); //If not all the tasks reached the desired state within the timeout then the job manager //cannot continue. if (timedOut) { const string errorMessage = "Mapper tasks did not complete within expected timeout."; Console.WriteLine(errorMessage); throw new TimeoutException(errorMessage); } }
/// <summary> /// Waits for all tasks under the specified job to complete and then prints each task's output to the console. /// </summary> /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param> /// <param name="tasks">The tasks to wait for.</param> /// <param name="timeout">The timeout. After this time has elapsed if the job is not complete and exception will be thrown.</param> /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns> public static async Task WaitForTasksAndPrintOutputAsync(BatchClient batchClient, IEnumerable <CloudTask> tasks, TimeSpan timeout) { // We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); // Wait until the tasks are in completed state. List <CloudTask> ourTasks = tasks.ToList(); bool timedOut = await taskStateMonitor.WaitAllAsync(ourTasks, TaskState.Completed, timeout).ConfigureAwait(continueOnCapturedContext: false); if (timedOut) { throw new TimeoutException("Timed out waiting for tasks"); } // dump task output foreach (CloudTask t in ourTasks) { Console.WriteLine("Task {0}", t.Id); //Read the standard out of the task NodeFile standardOutFile = await t.GetNodeFileAsync(Constants.StandardOutFileName).ConfigureAwait(continueOnCapturedContext: false); string standardOutText = await standardOutFile.ReadAsStringAsync().ConfigureAwait(continueOnCapturedContext: false); Console.WriteLine("Standard out:"); Console.WriteLine(standardOutText); //Read the standard error of the task NodeFile standardErrorFile = await t.GetNodeFileAsync(Constants.StandardErrorFileName).ConfigureAwait(continueOnCapturedContext: false); string standardErrorText = await standardErrorFile.ReadAsStringAsync().ConfigureAwait(continueOnCapturedContext: false); Console.WriteLine("Standard error:"); Console.WriteLine(standardErrorText); Console.WriteLine(); } }
/// <summary> /// Monitors the specified tasks for completion and returns a value indicating whether all tasks completed successfully /// within the timeout period. /// </summary> /// <param name="batchClient">A <see cref="BatchClient"/>.</param> /// <param name="jobId">The id of the job containing the tasks that should be monitored.</param> /// <param name="timeout">The period of time to wait for the tasks to reach the completed state.</param> /// <returns><c>true</c> if all tasks in the specified job completed with an exit code of 0 within the specified timeout period, otherwise <c>false</c>.</returns> private static async Task <bool> MonitorTasks(BatchClient batchClient, string jobId, TimeSpan timeout) { bool allTasksSuccessful = true; const string successMessage = "All tasks reached state Completed."; const string failureMessage = "One or more tasks failed to reach the Completed state within the timeout period."; // Obtain the collection of tasks currently managed by the job. Note that we use a detail level to // specify that only the "id" property of each task should be populated. Using a detail level for // all list operations helps to lower response time from the Batch service. ODATADetailLevel detail = new ODATADetailLevel(selectClause: "id"); List <CloudTask> tasks = await batchClient.JobOperations.ListTasks(JobId, detail).ToListAsync(); Console.WriteLine("Awaiting task completion, timeout in {0}...", timeout.ToString()); // We use a TaskStateMonitor to monitor the state of our tasks. In this case, we will wait for all tasks to // reach the Completed state. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(tasks, TaskState.Completed, timeout); if (timedOut) { allTasksSuccessful = false; await batchClient.JobOperations.TerminateJobAsync(jobId, failureMessage); Console.WriteLine(failureMessage); } else { await batchClient.JobOperations.TerminateJobAsync(jobId, successMessage); // All tasks have reached the "Completed" state, however, this does not guarantee all tasks completed successfully. // Here we further check each task's ExecutionInfo property to ensure that it did not encounter a scheduling error // or return a non-zero exit code. // Update the detail level to populate only the task id and executionInfo properties. // We refresh the tasks below, and need only this information for each task. detail.SelectClause = "id, executionInfo"; foreach (CloudTask task in tasks) { // Populate the task's properties with the latest info from the Batch service await task.RefreshAsync(detail); if (task.ExecutionInformation.SchedulingError != null) { // A scheduling error indicates a problem starting the task on the node. It is important to note that // the task's state can be "Completed," yet still have encountered a scheduling error. allTasksSuccessful = false; Console.WriteLine("WARNING: Task [{0}] encountered a scheduling error: {1}", task.Id, task.ExecutionInformation.SchedulingError.Message); } else if (task.ExecutionInformation.ExitCode != 0) { // A non-zero exit code may indicate that the application executed by the task encountered an error // during execution. As not every application returns non-zero on failure by default (e.g. robocopy), // your implementation of error checking may differ from this example. allTasksSuccessful = false; Console.WriteLine("WARNING: Task [{0}] returned a non-zero exit code - this may indicate task execution or completion failure.", task.Id); } } } if (allTasksSuccessful) { Console.WriteLine("Success! All tasks completed successfully within the specified timeout period."); } return(allTasksSuccessful); }
/// <summary> /// Populates Azure Storage with the required files, and /// submits the job to the Azure Batch service. /// </summary> public async Task RunAsync() { Console.WriteLine("Running with the following settings: "); Console.WriteLine("----------------------------------------"); Console.WriteLine(this.textSearchSettings.ToString()); Console.WriteLine(this.accountSettings.ToString()); CloudStorageAccount cloudStorageAccount = new CloudStorageAccount( new StorageCredentials( this.accountSettings.StorageAccountName, this.accountSettings.StorageAccountKey), this.accountSettings.StorageServiceUrl, useHttps: true); //Upload resources if required. if (this.textSearchSettings.ShouldUploadResources) { Console.WriteLine("Splitting file: {0} into {1} subfiles", Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); //Split the text file into the correct number of files for consumption by the mapper tasks. FileSplitter splitter = new FileSplitter(); List <string> mapperTaskFiles = await splitter.SplitAsync( Constants.TextFilePath, this.textSearchSettings.NumberOfMapperTasks); List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList(); await SampleHelpers.UploadResourcesAsync( cloudStorageAccount, this.textSearchSettings.BlobContainer, files); } //Generate a SAS for the container. string containerSasUrl = SampleHelpers.ConstructContainerSas( cloudStorageAccount, this.textSearchSettings.BlobContainer); //Set up the Batch Service credentials used to authenticate with the Batch Service. BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials( this.accountSettings.BatchServiceUrl, this.accountSettings.BatchAccountName, this.accountSettings.BatchAccountKey); using (BatchClient batchClient = await BatchClient.OpenAsync(credentials)) { // // Construct the job properties in local memory before commiting them to the Batch Service. // //Allow enough compute nodes in the pool to run each mapper task, and 1 extra to run the job manager. int numberOfPoolComputeNodes = 1 + this.textSearchSettings.NumberOfMapperTasks; //Define the pool specification for the pool which the job will run on. PoolSpecification poolSpecification = new PoolSpecification() { TargetDedicated = numberOfPoolComputeNodes, VirtualMachineSize = "small", //You can learn more about os families and versions at: //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix OSFamily = "4", TargetOSVersion = "*" }; //Use the auto pool feature of the Batch Service to create a pool when the job is created. //This creates a new pool for each job which is added. AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification() { AutoPoolIdPrefix = "TextSearchPool", KeepAlive = false, PoolLifetimeOption = PoolLifetimeOption.Job, PoolSpecification = poolSpecification }; //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above. PoolInformation poolInformation = new PoolInformation() { AutoPoolSpecification = autoPoolSpecification }; //Define the job manager for this job. This job manager will run first and will submit the tasks for //the job. The job manager is the executable which manages the lifetime of the job //and all tasks which should run for the job. In this case, the job manager submits the mapper and reducer tasks. List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSasUrl, Constants.RequiredExecutableFiles); const string jobManagerTaskId = "JobManager"; JobManagerTask jobManagerTask = new JobManagerTask() { ResourceFiles = jobManagerResourceFiles, CommandLine = Constants.JobManagerExecutable, //Determines if the job should terminate when the job manager process exits. KillJobOnCompletion = true, Id = jobManagerTaskId }; //Create the unbound job in local memory. An object which exists only in local memory (and not on the Batch Service) is "unbound". string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss"); CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation); unboundJob.JobManagerTask = jobManagerTask; //Assign the job manager task to this job try { //Commit the unbound job to the Batch Service. Console.WriteLine("Adding job: {0} to the Batch Service.", unboundJob.Id); await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above. // // Wait for the job manager task to complete. // //An object which is backed by a corresponding Batch Service object is "bound." CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId); CloudTask boundJobManagerTask = await boundJob.GetTaskAsync(jobManagerTaskId); TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30); // Monitor the current tasks to see when they are done. // Occasionally a task may get killed and requeued during an upgrade or hardware failure, including the job manager // task. The job manager will be re-run in this case. Robustness against this was not added into the sample for // simplicity, but should be added into any production code. Console.WriteLine("Waiting for job's tasks to complete"); TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> { boundJobManagerTask }, TaskState.Completed, maxJobCompletionTimeout); Console.WriteLine("Done waiting for job manager task."); await boundJobManagerTask.RefreshAsync(); //Check to ensure the job manager task exited successfully. await Helpers.CheckForTaskSuccessAsync(boundJobManagerTask, dumpStandardOutOnTaskSuccess : false); if (timedOut) { throw new TimeoutException(string.Format("Timed out waiting for job manager task to complete.")); } // // Download and write out the reducer tasks output // string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.BlobContainer, Constants.ReducerTaskResultBlobName); Console.WriteLine("Reducer reuslts:"); Console.WriteLine(reducerText); } finally { //Delete the job. //This will delete the auto pool associated with the job as long as the pool //keep alive property is set to false. if (this.textSearchSettings.ShouldDeleteJob) { Console.WriteLine("Deleting job {0}", jobId); batchClient.JobOperations.DeleteJob(jobId); } //Note that there were files uploaded to a container specified in the //configuration file. This container will not be deleted or cleaned up by this sample. } } }
/// <summary> /// Runs the job manager task. /// </summary> public async Task RunAsync() { Console.WriteLine("JobManager for account: {0}, job: {1} has started...", this.accountName, this.jobId); Console.WriteLine(); Console.WriteLine("JobManager running with the following settings: "); Console.WriteLine("----------------------------------------"); Console.WriteLine(this.configurationSettings.ToString()); //Set up the Batch Service credentials used to authenticate with the Batch Service. BatchSharedKeyCredentials batchSharedKeyCredentials = new BatchSharedKeyCredentials( this.configurationSettings.BatchServiceUrl, this.configurationSettings.BatchAccountName, this.configurationSettings.BatchAccountKey); using (BatchClient batchClient = await BatchClient.OpenAsync(batchSharedKeyCredentials)) { //Construct a container SAS to provide the Batch Service access to the files required to //run the mapper and reducer tasks. string containerSas = Helpers.ConstructContainerSas( this.configurationSettings.StorageAccountName, this.configurationSettings.StorageAccountKey, this.configurationSettings.StorageServiceUrl, this.configurationSettings.BlobContainer); // // Submit mapper tasks. // Console.WriteLine("Submitting {0} mapper tasks.", this.configurationSettings.NumberOfMapperTasks); //The collection of tasks to add to the Batch Service. List <CloudTask> tasksToAdd = new List <CloudTask>(); for (int i = 0; i < this.configurationSettings.NumberOfMapperTasks; i++) { string taskId = Helpers.GetMapperTaskId(i); string fileBlobName = Helpers.GetSplitFileName(i); string fileBlobPath = Helpers.ConstructBlobSource(containerSas, fileBlobName); string commandLine = string.Format("{0} -MapperTask {1}", Constants.TextSearchExe, fileBlobPath); CloudTask unboundMapperTask = new CloudTask(taskId, commandLine); //The set of files (exes, dlls and configuration files) required to run the mapper task. IReadOnlyList <string> mapperTaskRequiredFiles = Constants.RequiredExecutableFiles; List <ResourceFile> mapperTaskResourceFiles = Helpers.GetResourceFiles(containerSas, mapperTaskRequiredFiles); unboundMapperTask.ResourceFiles = mapperTaskResourceFiles; tasksToAdd.Add(unboundMapperTask); } //Submit the unbound task collection to the Batch Service. //Use the AddTask method which takes a collection of CloudTasks for the best performance. await batchClient.JobOperations.AddTaskAsync(this.jobId, tasksToAdd); // // Wait for the mapper tasks to complete. // Console.WriteLine("Waiting for the mapper tasks to complete..."); //List all the mapper tasks using an id filter. DetailLevel mapperTaskIdFilter = new ODATADetailLevel() { FilterClause = string.Format("startswith(id, '{0}')", Constants.MapperTaskPrefix) }; IEnumerable <CloudTask> tasksToMonitor = batchClient.JobOperations.ListTasks( this.jobId, detailLevel: mapperTaskIdFilter); //Use the task state monitor to wait for the tasks to complete. TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor(); bool timedOut = await taskStateMonitor.WaitAllAsync(tasksToMonitor, TaskState.Completed, TimeSpan.FromMinutes(5)); //Get the list of mapper tasks in order to analyze their state and ensure they completed successfully. IPagedEnumerable <CloudTask> asyncEnumerable = batchClient.JobOperations.ListTasks( this.jobId, detailLevel: mapperTaskIdFilter); await asyncEnumerable.ForEachAsync(async cloudTask => { Console.WriteLine("Task {0} is in state: {1}", cloudTask.Id, cloudTask.State); await Helpers.CheckForTaskSuccessAsync(cloudTask, dumpStandardOutOnTaskSuccess: false); Console.WriteLine(); }); //If not all the tasks reached the desired state within the timeout then the job manager //cannot continue. if (timedOut) { const string errorMessage = "Mapper tasks did not complete within expected timeout."; Console.WriteLine(errorMessage); throw new TimeoutException(errorMessage); } // // Create the reducer task. // string reducerTaskCommandLine = string.Format("{0} -ReducerTask", Constants.TextSearchExe); Console.WriteLine("Adding the reducer task: {0}", Constants.ReducerTaskId); CloudTask unboundReducerTask = new CloudTask(Constants.ReducerTaskId, reducerTaskCommandLine); //The set of files (exes, dlls and configuration files) required to run the reducer task. List <ResourceFile> reducerTaskResourceFiles = Helpers.GetResourceFiles(containerSas, Constants.RequiredExecutableFiles); unboundReducerTask.ResourceFiles = reducerTaskResourceFiles; //Send the request to the Batch Service to add the reducer task. await batchClient.JobOperations.AddTaskAsync(this.jobId, unboundReducerTask); // //Wait for the reducer task to complete. // //Get the bound reducer task and monitor it for completion. CloudTask boundReducerTask = await batchClient.JobOperations.GetTaskAsync(this.jobId, Constants.ReducerTaskId); timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> { boundReducerTask }, TaskState.Completed, TimeSpan.FromMinutes(2)); //Refresh the reducer task to get the most recent information about it from the Batch Service. await boundReducerTask.RefreshAsync(); //Dump the reducer tasks exit code and scheduling error for debugging purposes. await Helpers.CheckForTaskSuccessAsync(boundReducerTask, dumpStandardOutOnTaskSuccess : true); //Handle the possibilty that the reducer task did not complete in the expected timeout. if (timedOut) { const string errorMessage = "Reducer task did not complete within expected timeout."; Console.WriteLine("Task {0} is in state: {1}", boundReducerTask.Id, boundReducerTask.State); Console.WriteLine(errorMessage); throw new TimeoutException(errorMessage); } //The job manager has completed. Console.WriteLine("JobManager completed successfully."); } }