private async Task <string> WaitForReducerTaskToCompleteAsync(BatchClient batchClient)
        {
            //Get the bound reducer task and monitor it for completion.
            CloudTask boundReducerTask = await batchClient.JobOperations.GetTaskAsync(this.jobId, Constants.ReducerTaskId);

            TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();

            bool timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> {
                boundReducerTask
            }, TaskState.Completed, TimeSpan.FromMinutes(2));

            //Refresh the reducer task to get the most recent information about it from the Batch Service.
            await boundReducerTask.RefreshAsync();

            //Dump the reducer tasks exit code and scheduling error for debugging purposes.
            string stdOut = await Helpers.CheckForTaskSuccessAsync(boundReducerTask, dumpStandardOutOnTaskSuccess : true);

            //Handle the possibilty that the reducer task did not complete in the expected timeout.
            if (timedOut)
            {
                const string errorMessage = "Reducer task did not complete within expected timeout.";

                Console.WriteLine("Task {0} is in state: {1}", boundReducerTask.Id, boundReducerTask.State);

                Console.WriteLine(errorMessage);
                throw new TimeoutException(errorMessage);
            }

            return(stdOut);
        }
Example #2
0
        /// <summary>
        /// Waits for all tasks under the specified job to complete and then prints each task's output to the console.
        /// </summary>
        /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param>
        /// <param name="jobId">The ID of the job.</param>
        /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns>
        private static async Task WaitForJobAndPrintOutputAsync(BatchClient batchClient, string jobId)
        {
            Console.WriteLine("Waiting for all tasks to complete on job: {0} ...", jobId);

            // We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete.
            TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();

            List <CloudTask> ourTasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync();

            // Wait for all tasks to reach the completed state.
            // If the pool is being resized then enough time is needed for the nodes to reach the idle state in order
            // for tasks to run on them.
            bool timedOut = await taskStateMonitor.WaitAllAsync(ourTasks, TaskState.Completed, TimeSpan.FromMinutes(10));

            if (timedOut)
            {
                throw new TimeoutException("Timed out waiting for tasks");
            }

            // dump task output
            foreach (CloudTask t in ourTasks)
            {
                Console.WriteLine("Task {0}", t.Id);

                //Read the standard out of the task
                NodeFile standardOutFile = await t.GetNodeFileAsync(Constants.StandardOutFileName);

                string standardOutText = await standardOutFile.ReadAsStringAsync();

                Console.WriteLine("Standard out:");
                Console.WriteLine(standardOutText);

                Console.WriteLine();
            }
        }
        private async Task WaitForMapperTasksToCompleteAsync(BatchClient batchClient)
        {
            Console.WriteLine("Waiting for the mapper tasks to complete...");

            //List all the mapper tasks using an id filter.
            DetailLevel mapperTaskIdFilter = new ODATADetailLevel()
            {
                FilterClause = string.Format("startswith(id, '{0}')", Constants.MapperTaskPrefix)
            };

            IEnumerable <CloudTask> tasksToMonitor = batchClient.JobOperations.ListTasks(
                this.jobId,
                detailLevel: mapperTaskIdFilter);

            // Use the task state monitor to wait for the tasks to complete.  Monitoring the tasks
            // for completion is necessary if you are using KillJobOnCompletion = TRUE, as otherwise when the job manager
            // exits it will kill all of the tasks that are still running under the job.
            TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();

            bool timedOut = await taskStateMonitor.WaitAllAsync(tasksToMonitor, TaskState.Completed, TimeSpan.FromMinutes(5));

            //Get the list of mapper tasks in order to analyze their state and ensure they completed successfully.
            IPagedEnumerable <CloudTask> asyncEnumerable = batchClient.JobOperations.ListTasks(
                this.jobId,
                detailLevel: mapperTaskIdFilter);

            await asyncEnumerable.ForEachAsync(async cloudTask =>
            {
                Console.WriteLine("Task {0} is in state: {1}", cloudTask.Id, cloudTask.State);

                await Helpers.CheckForTaskSuccessAsync(cloudTask, dumpStandardOutOnTaskSuccess: false);

                Console.WriteLine();
            });

            //If not all the tasks reached the desired state within the timeout then the job manager
            //cannot continue.
            if (timedOut)
            {
                const string errorMessage = "Mapper tasks did not complete within expected timeout.";
                Console.WriteLine(errorMessage);

                throw new TimeoutException(errorMessage);
            }
        }
Example #4
0
        /// <summary>
        /// Waits for all tasks under the specified job to complete and then prints each task's output to the console.
        /// </summary>
        /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param>
        /// <param name="tasks">The tasks to wait for.</param>
        /// <param name="timeout">The timeout.  After this time has elapsed if the job is not complete and exception will be thrown.</param>
        /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns>
        public static async Task WaitForTasksAndPrintOutputAsync(BatchClient batchClient, IEnumerable <CloudTask> tasks, TimeSpan timeout)
        {
            // We use the task state monitor to monitor the state of our tasks -- in this case we will wait for them all to complete.
            TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();

            // Wait until the tasks are in completed state.
            List <CloudTask> ourTasks = tasks.ToList();

            bool timedOut = await taskStateMonitor.WaitAllAsync(ourTasks, TaskState.Completed, timeout).ConfigureAwait(continueOnCapturedContext: false);

            if (timedOut)
            {
                throw new TimeoutException("Timed out waiting for tasks");
            }

            // dump task output
            foreach (CloudTask t in ourTasks)
            {
                Console.WriteLine("Task {0}", t.Id);

                //Read the standard out of the task
                NodeFile standardOutFile = await t.GetNodeFileAsync(Constants.StandardOutFileName).ConfigureAwait(continueOnCapturedContext: false);

                string standardOutText = await standardOutFile.ReadAsStringAsync().ConfigureAwait(continueOnCapturedContext: false);

                Console.WriteLine("Standard out:");
                Console.WriteLine(standardOutText);

                //Read the standard error of the task
                NodeFile standardErrorFile = await t.GetNodeFileAsync(Constants.StandardErrorFileName).ConfigureAwait(continueOnCapturedContext: false);

                string standardErrorText = await standardErrorFile.ReadAsStringAsync().ConfigureAwait(continueOnCapturedContext: false);

                Console.WriteLine("Standard error:");
                Console.WriteLine(standardErrorText);

                Console.WriteLine();
            }
        }
Example #5
0
        /// <summary>
        /// Monitors the specified tasks for completion and returns a value indicating whether all tasks completed successfully
        /// within the timeout period.
        /// </summary>
        /// <param name="batchClient">A <see cref="BatchClient"/>.</param>
        /// <param name="jobId">The id of the job containing the tasks that should be monitored.</param>
        /// <param name="timeout">The period of time to wait for the tasks to reach the completed state.</param>
        /// <returns><c>true</c> if all tasks in the specified job completed with an exit code of 0 within the specified timeout period, otherwise <c>false</c>.</returns>
        private static async Task <bool> MonitorTasks(BatchClient batchClient, string jobId, TimeSpan timeout)
        {
            bool         allTasksSuccessful = true;
            const string successMessage     = "All tasks reached state Completed.";
            const string failureMessage     = "One or more tasks failed to reach the Completed state within the timeout period.";

            // Obtain the collection of tasks currently managed by the job. Note that we use a detail level to
            // specify that only the "id" property of each task should be populated. Using a detail level for
            // all list operations helps to lower response time from the Batch service.
            ODATADetailLevel detail = new ODATADetailLevel(selectClause: "id");
            List <CloudTask> tasks  = await batchClient.JobOperations.ListTasks(JobId, detail).ToListAsync();

            Console.WriteLine("Awaiting task completion, timeout in {0}...", timeout.ToString());

            // We use a TaskStateMonitor to monitor the state of our tasks. In this case, we will wait for all tasks to
            // reach the Completed state.
            TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();
            bool             timedOut         = await taskStateMonitor.WaitAllAsync(tasks, TaskState.Completed, timeout);

            if (timedOut)
            {
                allTasksSuccessful = false;

                await batchClient.JobOperations.TerminateJobAsync(jobId, failureMessage);

                Console.WriteLine(failureMessage);
            }
            else
            {
                await batchClient.JobOperations.TerminateJobAsync(jobId, successMessage);

                // All tasks have reached the "Completed" state, however, this does not guarantee all tasks completed successfully.
                // Here we further check each task's ExecutionInfo property to ensure that it did not encounter a scheduling error
                // or return a non-zero exit code.

                // Update the detail level to populate only the task id and executionInfo properties.
                // We refresh the tasks below, and need only this information for each task.
                detail.SelectClause = "id, executionInfo";

                foreach (CloudTask task in tasks)
                {
                    // Populate the task's properties with the latest info from the Batch service
                    await task.RefreshAsync(detail);

                    if (task.ExecutionInformation.SchedulingError != null)
                    {
                        // A scheduling error indicates a problem starting the task on the node. It is important to note that
                        // the task's state can be "Completed," yet still have encountered a scheduling error.

                        allTasksSuccessful = false;

                        Console.WriteLine("WARNING: Task [{0}] encountered a scheduling error: {1}", task.Id, task.ExecutionInformation.SchedulingError.Message);
                    }
                    else if (task.ExecutionInformation.ExitCode != 0)
                    {
                        // A non-zero exit code may indicate that the application executed by the task encountered an error
                        // during execution. As not every application returns non-zero on failure by default (e.g. robocopy),
                        // your implementation of error checking may differ from this example.

                        allTasksSuccessful = false;

                        Console.WriteLine("WARNING: Task [{0}] returned a non-zero exit code - this may indicate task execution or completion failure.", task.Id);
                    }
                }
            }

            if (allTasksSuccessful)
            {
                Console.WriteLine("Success! All tasks completed successfully within the specified timeout period.");
            }

            return(allTasksSuccessful);
        }
        /// <summary>
        /// Populates Azure Storage with the required files, and
        /// submits the job to the Azure Batch service.
        /// </summary>
        public async Task RunAsync()
        {
            Console.WriteLine("Running with the following settings: ");
            Console.WriteLine("----------------------------------------");
            Console.WriteLine(this.textSearchSettings.ToString());
            Console.WriteLine(this.accountSettings.ToString());

            CloudStorageAccount cloudStorageAccount = new CloudStorageAccount(
                new StorageCredentials(
                    this.accountSettings.StorageAccountName,
                    this.accountSettings.StorageAccountKey),
                this.accountSettings.StorageServiceUrl,
                useHttps: true);

            //Upload resources if required.
            if (this.textSearchSettings.ShouldUploadResources)
            {
                Console.WriteLine("Splitting file: {0} into {1} subfiles",
                                  Constants.TextFilePath,
                                  this.textSearchSettings.NumberOfMapperTasks);

                //Split the text file into the correct number of files for consumption by the mapper tasks.
                FileSplitter  splitter        = new FileSplitter();
                List <string> mapperTaskFiles = await splitter.SplitAsync(
                    Constants.TextFilePath,
                    this.textSearchSettings.NumberOfMapperTasks);

                List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList();

                await SampleHelpers.UploadResourcesAsync(
                    cloudStorageAccount,
                    this.textSearchSettings.BlobContainer,
                    files);
            }

            //Generate a SAS for the container.
            string containerSasUrl = SampleHelpers.ConstructContainerSas(
                cloudStorageAccount,
                this.textSearchSettings.BlobContainer);

            //Set up the Batch Service credentials used to authenticate with the Batch Service.
            BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials(
                this.accountSettings.BatchServiceUrl,
                this.accountSettings.BatchAccountName,
                this.accountSettings.BatchAccountKey);

            using (BatchClient batchClient = await BatchClient.OpenAsync(credentials))
            {
                //
                // Construct the job properties in local memory before commiting them to the Batch Service.
                //

                //Allow enough compute nodes in the pool to run each mapper task, and 1 extra to run the job manager.
                int numberOfPoolComputeNodes = 1 + this.textSearchSettings.NumberOfMapperTasks;

                //Define the pool specification for the pool which the job will run on.
                PoolSpecification poolSpecification = new PoolSpecification()
                {
                    TargetDedicated    = numberOfPoolComputeNodes,
                    VirtualMachineSize = "small",
                    //You can learn more about os families and versions at:
                    //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix
                    OSFamily        = "4",
                    TargetOSVersion = "*"
                };

                //Use the auto pool feature of the Batch Service to create a pool when the job is created.
                //This creates a new pool for each job which is added.
                AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification()
                {
                    AutoPoolIdPrefix   = "TextSearchPool",
                    KeepAlive          = false,
                    PoolLifetimeOption = PoolLifetimeOption.Job,
                    PoolSpecification  = poolSpecification
                };

                //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above.
                PoolInformation poolInformation = new PoolInformation()
                {
                    AutoPoolSpecification = autoPoolSpecification
                };

                //Define the job manager for this job.  This job manager will run first and will submit the tasks for
                //the job.  The job manager is the executable which manages the lifetime of the job
                //and all tasks which should run for the job.  In this case, the job manager submits the mapper and reducer tasks.
                List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSasUrl, Constants.RequiredExecutableFiles);
                const string        jobManagerTaskId        = "JobManager";

                JobManagerTask jobManagerTask = new JobManagerTask()
                {
                    ResourceFiles = jobManagerResourceFiles,
                    CommandLine   = Constants.JobManagerExecutable,

                    //Determines if the job should terminate when the job manager process exits.
                    KillJobOnCompletion = true,
                    Id = jobManagerTaskId
                };

                //Create the unbound job in local memory.  An object which exists only in local memory (and not on the Batch Service) is "unbound".
                string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss");

                CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation);
                unboundJob.JobManagerTask = jobManagerTask; //Assign the job manager task to this job

                try
                {
                    //Commit the unbound job to the Batch Service.
                    Console.WriteLine("Adding job: {0} to the Batch Service.", unboundJob.Id);
                    await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above.

                    //
                    // Wait for the job manager task to complete.
                    //

                    //An object which is backed by a corresponding Batch Service object is "bound."
                    CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId);

                    CloudTask boundJobManagerTask = await boundJob.GetTaskAsync(jobManagerTaskId);

                    TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30);

                    // Monitor the current tasks to see when they are done.
                    // Occasionally a task may get killed and requeued during an upgrade or hardware failure, including the job manager
                    // task.  The job manager will be re-run in this case.  Robustness against this was not added into the sample for
                    // simplicity, but should be added into any production code.
                    Console.WriteLine("Waiting for job's tasks to complete");

                    TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();
                    bool             timedOut         = await taskStateMonitor.WaitAllAsync(new List <CloudTask> {
                        boundJobManagerTask
                    }, TaskState.Completed, maxJobCompletionTimeout);

                    Console.WriteLine("Done waiting for job manager task.");

                    await boundJobManagerTask.RefreshAsync();

                    //Check to ensure the job manager task exited successfully.
                    await Helpers.CheckForTaskSuccessAsync(boundJobManagerTask, dumpStandardOutOnTaskSuccess : false);

                    if (timedOut)
                    {
                        throw new TimeoutException(string.Format("Timed out waiting for job manager task to complete."));
                    }

                    //
                    // Download and write out the reducer tasks output
                    //

                    string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.BlobContainer, Constants.ReducerTaskResultBlobName);

                    Console.WriteLine("Reducer reuslts:");
                    Console.WriteLine(reducerText);
                }
                finally
                {
                    //Delete the job.
                    //This will delete the auto pool associated with the job as long as the pool
                    //keep alive property is set to false.
                    if (this.textSearchSettings.ShouldDeleteJob)
                    {
                        Console.WriteLine("Deleting job {0}", jobId);
                        batchClient.JobOperations.DeleteJob(jobId);
                    }

                    //Note that there were files uploaded to a container specified in the
                    //configuration file.  This container will not be deleted or cleaned up by this sample.
                }
            }
        }
Example #7
0
        /// <summary>
        /// Runs the job manager task.
        /// </summary>
        public async Task RunAsync()
        {
            Console.WriteLine("JobManager for account: {0}, job: {1} has started...",
                              this.accountName,
                              this.jobId);
            Console.WriteLine();

            Console.WriteLine("JobManager running with the following settings: ");
            Console.WriteLine("----------------------------------------");
            Console.WriteLine(this.configurationSettings.ToString());

            //Set up the Batch Service credentials used to authenticate with the Batch Service.
            BatchSharedKeyCredentials batchSharedKeyCredentials = new BatchSharedKeyCredentials(
                this.configurationSettings.BatchServiceUrl,
                this.configurationSettings.BatchAccountName,
                this.configurationSettings.BatchAccountKey);

            using (BatchClient batchClient = await BatchClient.OpenAsync(batchSharedKeyCredentials))
            {
                //Construct a container SAS to provide the Batch Service access to the files required to
                //run the mapper and reducer tasks.
                string containerSas = Helpers.ConstructContainerSas(
                    this.configurationSettings.StorageAccountName,
                    this.configurationSettings.StorageAccountKey,
                    this.configurationSettings.StorageServiceUrl,
                    this.configurationSettings.BlobContainer);

                //
                // Submit mapper tasks.
                //
                Console.WriteLine("Submitting {0} mapper tasks.", this.configurationSettings.NumberOfMapperTasks);

                //The collection of tasks to add to the Batch Service.
                List <CloudTask> tasksToAdd = new List <CloudTask>();

                for (int i = 0; i < this.configurationSettings.NumberOfMapperTasks; i++)
                {
                    string taskId       = Helpers.GetMapperTaskId(i);
                    string fileBlobName = Helpers.GetSplitFileName(i);
                    string fileBlobPath = Helpers.ConstructBlobSource(containerSas, fileBlobName);

                    string    commandLine       = string.Format("{0} -MapperTask {1}", Constants.TextSearchExe, fileBlobPath);
                    CloudTask unboundMapperTask = new CloudTask(taskId, commandLine);

                    //The set of files (exes, dlls and configuration files) required to run the mapper task.
                    IReadOnlyList <string> mapperTaskRequiredFiles = Constants.RequiredExecutableFiles;

                    List <ResourceFile> mapperTaskResourceFiles = Helpers.GetResourceFiles(containerSas, mapperTaskRequiredFiles);

                    unboundMapperTask.ResourceFiles = mapperTaskResourceFiles;

                    tasksToAdd.Add(unboundMapperTask);
                }

                //Submit the unbound task collection to the Batch Service.
                //Use the AddTask method which takes a collection of CloudTasks for the best performance.
                await batchClient.JobOperations.AddTaskAsync(this.jobId, tasksToAdd);

                //
                // Wait for the mapper tasks to complete.
                //
                Console.WriteLine("Waiting for the mapper tasks to complete...");

                //List all the mapper tasks using an id filter.
                DetailLevel mapperTaskIdFilter = new ODATADetailLevel()
                {
                    FilterClause = string.Format("startswith(id, '{0}')", Constants.MapperTaskPrefix)
                };

                IEnumerable <CloudTask> tasksToMonitor = batchClient.JobOperations.ListTasks(
                    this.jobId,
                    detailLevel: mapperTaskIdFilter);

                //Use the task state monitor to wait for the tasks to complete.
                TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();

                bool timedOut = await taskStateMonitor.WaitAllAsync(tasksToMonitor, TaskState.Completed, TimeSpan.FromMinutes(5));

                //Get the list of mapper tasks in order to analyze their state and ensure they completed successfully.
                IPagedEnumerable <CloudTask> asyncEnumerable = batchClient.JobOperations.ListTasks(
                    this.jobId,
                    detailLevel: mapperTaskIdFilter);

                await asyncEnumerable.ForEachAsync(async cloudTask =>
                {
                    Console.WriteLine("Task {0} is in state: {1}", cloudTask.Id, cloudTask.State);

                    await Helpers.CheckForTaskSuccessAsync(cloudTask, dumpStandardOutOnTaskSuccess: false);

                    Console.WriteLine();
                });

                //If not all the tasks reached the desired state within the timeout then the job manager
                //cannot continue.
                if (timedOut)
                {
                    const string errorMessage = "Mapper tasks did not complete within expected timeout.";
                    Console.WriteLine(errorMessage);

                    throw new TimeoutException(errorMessage);
                }

                //
                // Create the reducer task.
                //
                string reducerTaskCommandLine = string.Format("{0} -ReducerTask", Constants.TextSearchExe);

                Console.WriteLine("Adding the reducer task: {0}", Constants.ReducerTaskId);
                CloudTask unboundReducerTask = new CloudTask(Constants.ReducerTaskId, reducerTaskCommandLine);

                //The set of files (exes, dlls and configuration files) required to run the reducer task.
                List <ResourceFile> reducerTaskResourceFiles = Helpers.GetResourceFiles(containerSas, Constants.RequiredExecutableFiles);

                unboundReducerTask.ResourceFiles = reducerTaskResourceFiles;

                //Send the request to the Batch Service to add the reducer task.
                await batchClient.JobOperations.AddTaskAsync(this.jobId, unboundReducerTask);

                //
                //Wait for the reducer task to complete.
                //

                //Get the bound reducer task and monitor it for completion.
                CloudTask boundReducerTask = await batchClient.JobOperations.GetTaskAsync(this.jobId, Constants.ReducerTaskId);

                timedOut = await taskStateMonitor.WaitAllAsync(new List <CloudTask> {
                    boundReducerTask
                }, TaskState.Completed, TimeSpan.FromMinutes(2));

                //Refresh the reducer task to get the most recent information about it from the Batch Service.
                await boundReducerTask.RefreshAsync();

                //Dump the reducer tasks exit code and scheduling error for debugging purposes.
                await Helpers.CheckForTaskSuccessAsync(boundReducerTask, dumpStandardOutOnTaskSuccess : true);

                //Handle the possibilty that the reducer task did not complete in the expected timeout.
                if (timedOut)
                {
                    const string errorMessage = "Reducer task did not complete within expected timeout.";

                    Console.WriteLine("Task {0} is in state: {1}", boundReducerTask.Id, boundReducerTask.State);

                    Console.WriteLine(errorMessage);
                    throw new TimeoutException(errorMessage);
                }

                //The job manager has completed.
                Console.WriteLine("JobManager completed successfully.");
            }
        }