private async Task SubmitMapperTasksAsync(BatchClient batchClient, string containerSas)
        {
            Console.WriteLine("Submitting {0} mapper tasks.", this.configurationSettings.NumberOfMapperTasks);

            //The collection of tasks to add to the Batch Service.
            List <CloudTask> tasksToAdd = new List <CloudTask>();

            for (int i = 0; i < this.configurationSettings.NumberOfMapperTasks; i++)
            {
                string taskId       = Helpers.GetMapperTaskId(i);
                string fileBlobName = Helpers.GetSplitFileName(i);
                string fileBlobPath = SampleHelpers.ConstructBlobSource(containerSas, fileBlobName);

                string    commandLine       = string.Format("{0} {1}", Constants.MapperTaskExecutable, fileBlobPath);
                CloudTask unboundMapperTask = new CloudTask(taskId, commandLine);

                //The set of files (exes, dlls and configuration files) required to run the mapper task.
                IReadOnlyList <string> mapperTaskRequiredFiles = Constants.RequiredExecutableFiles;

                List <ResourceFile> mapperTaskResourceFiles = SampleHelpers.GetResourceFiles(containerSas, mapperTaskRequiredFiles);

                unboundMapperTask.ResourceFiles = mapperTaskResourceFiles;

                tasksToAdd.Add(unboundMapperTask);
            }

            //Submit the unbound task collection to the Batch Service.
            //Use the AddTask method which takes a collection of CloudTasks for the best performance.
            await batchClient.JobOperations.AddTaskAsync(this.jobId, tasksToAdd);
        }
Ejemplo n.º 2
0
        private CloudTask CreateReducerTask(string inputContainerSas, string outputContainerSas, IEnumerable <CloudTask> mapperTasks)
        {
            CloudTask unboundReducerTask = new CloudTask(Constants.ReducerTaskId, Constants.ReducerTaskExecutable);

            //The set of files (exes, dlls and configuration files) required to run the reducer task.
            List <ResourceFile> reducerTaskResourceFiles = SampleHelpers.GetResourceFiles(inputContainerSas, Constants.RequiredExecutableFiles);

            //The mapper outputs to reduce
            var mapperOutputs = Enumerable.Range(0, this.textSearchSettings.NumberOfMapperTasks).Select(Helpers.GetMapperTaskId);

            reducerTaskResourceFiles.AddRange(SampleHelpers.GetResourceFiles(outputContainerSas, mapperOutputs));
            unboundReducerTask.ResourceFiles = reducerTaskResourceFiles;

            // Upload the reducer task stdout as the result file for the entire job
            unboundReducerTask.OutputFiles = new List <OutputFile>
            {
                new OutputFile(
                    filePattern: "..\\stdout.txt",
                    destination: new OutputFileDestination(
                        container: new OutputFileBlobContainerDestination(outputContainerSas, path: Constants.ReducerTaskResultBlobName)),
                    uploadOptions: new OutputFileUploadOptions(uploadCondition: OutputFileUploadCondition.TaskSuccess))
            };

            // Depend on the mapper tasks so that they are all complete before the reducer runs
            unboundReducerTask.DependsOn = TaskDependencies.OnTasks(mapperTasks);

            return(unboundReducerTask);
        }
Ejemplo n.º 3
0
        private IEnumerable <CloudTask> CreateMapperTasks(string inputContainerSas, string outputContainerSas)
        {
            //The collection of tasks to add to the Batch Service.
            List <CloudTask> tasksToAdd = new List <CloudTask>();

            for (int i = 0; i < this.textSearchSettings.NumberOfMapperTasks; i++)
            {
                string taskId            = Helpers.GetMapperTaskId(i);
                string fileBlobName      = Helpers.GetSplitFileName(i);
                string mapperFileBlobSas = SampleHelpers.ConstructBlobSource(inputContainerSas, fileBlobName);

                string    commandLine       = string.Format("{0} {1}", Constants.MapperTaskExecutable, fileBlobName);
                CloudTask unboundMapperTask = new CloudTask(taskId, commandLine);

                //The set of files (exes, dlls and configuration files) required to run the mapper task. They have already been uploaded
                //so just get their sas's
                IReadOnlyList <string> mapperTaskRequiredFiles = Constants.RequiredExecutableFiles;
                List <ResourceFile>    mapperTaskResourceFiles = SampleHelpers.GetResourceFiles(inputContainerSas, mapperTaskRequiredFiles);
                mapperTaskResourceFiles.Add(ResourceFile.FromUrl(mapperFileBlobSas, fileBlobName));

                unboundMapperTask.OutputFiles = new List <OutputFile>
                {
                    new OutputFile(
                        filePattern: "..\\stdout.txt",
                        destination: new OutputFileDestination(
                            container: new OutputFileBlobContainerDestination(outputContainerSas, path: taskId)),
                        uploadOptions: new OutputFileUploadOptions(uploadCondition: OutputFileUploadCondition.TaskSuccess))
                };
                unboundMapperTask.ResourceFiles = mapperTaskResourceFiles;

                yield return(unboundMapperTask);
            }
        }
        private async Task SubmitReducerTaskAsync(BatchClient batchClient, string containerSas)
        {
            Console.WriteLine("Adding the reducer task: {0}", Constants.ReducerTaskId);
            CloudTask unboundReducerTask = new CloudTask(Constants.ReducerTaskId, Constants.ReducerTaskExecutable);

            //The set of files (exes, dlls and configuration files) required to run the reducer task.
            List <ResourceFile> reducerTaskResourceFiles = SampleHelpers.GetResourceFiles(containerSas, Constants.RequiredExecutableFiles);

            unboundReducerTask.ResourceFiles = reducerTaskResourceFiles;

            //Send the request to the Batch Service to add the reducer task.
            await batchClient.JobOperations.AddTaskAsync(this.jobId, unboundReducerTask);
        }
        /// <summary>
        /// Creates a job and adds a task to it. The task is a
        /// custom executable which has a resource file associated with it.
        /// </summary>
        /// <param name="batchClient">The BatchClient to use when interacting with the Batch service.</param>
        /// <param name="storageAccount">The cloud storage account to upload files to.</param>
        /// <param name="jobId">The ID of the job.</param>
        /// <returns>An asynchronous <see cref="Task"/> representing the operation.</returns>
        private async Task SubmitJobAsync(BatchClient batchClient, CloudStorageAccount storageAccount, string jobId)
        {
            // create an empty unbound Job
            CloudJob unboundJob = batchClient.JobOperations.CreateJob();

            unboundJob.Id = jobId;
            unboundJob.PoolInformation = new PoolInformation()
            {
                PoolId = this.jobManagerSettings.PoolId
            };

            // Upload the required files for the job manager task
            await SampleHelpers.UploadResourcesAsync(storageAccount, this.jobManagerSettings.BlobContainer, JobManagerRequiredFiles);

            string containerSas = SampleHelpers.ConstructContainerSas(storageAccount, this.jobManagerSettings.BlobContainer);
            List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSas, JobManagerRequiredFiles);

            // Set up the JobManager environment settings
            List <EnvironmentSetting> jobManagerEnvironmentSettings = new List <EnvironmentSetting>()
            {
                // No need to pass the batch account name as an environment variable since the batch service provides
                // an environment variable for each task which contains the account name

                new EnvironmentSetting("SAMPLE_BATCH_KEY", this.accountSettings.BatchAccountKey),
                new EnvironmentSetting("SAMPLE_BATCH_URL", this.accountSettings.BatchServiceUrl),

                new EnvironmentSetting("SAMPLE_STORAGE_ACCOUNT", this.accountSettings.StorageAccountName),
                new EnvironmentSetting("SAMPLE_STORAGE_KEY", this.accountSettings.StorageAccountKey),
                new EnvironmentSetting("SAMPLE_STORAGE_URL", this.accountSettings.StorageServiceUrl),
            };

            unboundJob.JobManagerTask = new JobManagerTask()
            {
                Id                  = JobManagerTaskId,
                CommandLine         = JobManagerTaskExe,
                ResourceFiles       = jobManagerResourceFiles,
                KillJobOnCompletion = true,
                EnvironmentSettings = jobManagerEnvironmentSettings
            };

            // Commit Job to create it in the service
            await unboundJob.CommitAsync();
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Populates Azure Storage with the required files, and
        /// submits the job to the Azure Batch service.
        /// </summary>
        public async Task RunAsync()
        {
            Console.WriteLine("Running with the following settings: ");
            Console.WriteLine("----------------------------------------");
            Console.WriteLine(this.textSearchSettings.ToString());
            Console.WriteLine(this.accountSettings.ToString());

            CloudStorageAccount cloudStorageAccount = new CloudStorageAccount(
                new StorageCredentials(
                    this.accountSettings.StorageAccountName,
                    this.accountSettings.StorageAccountKey),
                this.accountSettings.StorageServiceUrl,
                useHttps: true);

            //Upload resources if required.
            if (this.textSearchSettings.ShouldUploadResources)
            {
                Console.WriteLine("Splitting file: {0} into {1} subfiles",
                                  Constants.TextFilePath,
                                  this.textSearchSettings.NumberOfMapperTasks);

                //Split the text file into the correct number of files for consumption by the mapper tasks.
                FileSplitter  splitter        = new FileSplitter();
                List <string> mapperTaskFiles = await splitter.SplitAsync(
                    Constants.TextFilePath,
                    this.textSearchSettings.NumberOfMapperTasks);

                List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList();

                await SampleHelpers.UploadResourcesAsync(
                    cloudStorageAccount,
                    this.textSearchSettings.BlobContainer,
                    files);
            }

            //Generate a SAS for the container.
            string containerSasUrl = SampleHelpers.ConstructContainerSas(
                cloudStorageAccount,
                this.textSearchSettings.BlobContainer);

            //Set up the Batch Service credentials used to authenticate with the Batch Service.
            BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials(
                this.accountSettings.BatchServiceUrl,
                this.accountSettings.BatchAccountName,
                this.accountSettings.BatchAccountKey);

            using (BatchClient batchClient = await BatchClient.OpenAsync(credentials))
            {
                //
                // Construct the job properties in local memory before commiting them to the Batch Service.
                //

                //Allow enough compute nodes in the pool to run each mapper task, and 1 extra to run the job manager.
                int numberOfPoolComputeNodes = 1 + this.textSearchSettings.NumberOfMapperTasks;

                //Define the pool specification for the pool which the job will run on.
                PoolSpecification poolSpecification = new PoolSpecification()
                {
                    TargetDedicated    = numberOfPoolComputeNodes,
                    VirtualMachineSize = "small",
                    //You can learn more about os families and versions at:
                    //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix
                    OSFamily        = "4",
                    TargetOSVersion = "*"
                };

                //Use the auto pool feature of the Batch Service to create a pool when the job is created.
                //This creates a new pool for each job which is added.
                AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification()
                {
                    AutoPoolIdPrefix   = "TextSearchPool",
                    KeepAlive          = false,
                    PoolLifetimeOption = PoolLifetimeOption.Job,
                    PoolSpecification  = poolSpecification
                };

                //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above.
                PoolInformation poolInformation = new PoolInformation()
                {
                    AutoPoolSpecification = autoPoolSpecification
                };

                //Define the job manager for this job.  This job manager will run first and will submit the tasks for
                //the job.  The job manager is the executable which manages the lifetime of the job
                //and all tasks which should run for the job.  In this case, the job manager submits the mapper and reducer tasks.
                List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSasUrl, Constants.RequiredExecutableFiles);
                const string        jobManagerTaskId        = "JobManager";

                JobManagerTask jobManagerTask = new JobManagerTask()
                {
                    ResourceFiles = jobManagerResourceFiles,
                    CommandLine   = Constants.JobManagerExecutable,

                    //Determines if the job should terminate when the job manager process exits.
                    KillJobOnCompletion = true,
                    Id = jobManagerTaskId
                };

                //Create the unbound job in local memory.  An object which exists only in local memory (and not on the Batch Service) is "unbound".
                string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss");

                CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation);
                unboundJob.JobManagerTask = jobManagerTask; //Assign the job manager task to this job

                try
                {
                    //Commit the unbound job to the Batch Service.
                    Console.WriteLine("Adding job: {0} to the Batch Service.", unboundJob.Id);
                    await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above.

                    //
                    // Wait for the job manager task to complete.
                    //

                    //An object which is backed by a corresponding Batch Service object is "bound."
                    CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId);

                    CloudTask boundJobManagerTask = await boundJob.GetTaskAsync(jobManagerTaskId);

                    TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30);

                    // Monitor the current tasks to see when they are done.
                    // Occasionally a task may get killed and requeued during an upgrade or hardware failure, including the job manager
                    // task.  The job manager will be re-run in this case.  Robustness against this was not added into the sample for
                    // simplicity, but should be added into any production code.
                    Console.WriteLine("Waiting for job's tasks to complete");

                    TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();
                    bool             timedOut         = await taskStateMonitor.WaitAllAsync(new List <CloudTask> {
                        boundJobManagerTask
                    }, TaskState.Completed, maxJobCompletionTimeout);

                    Console.WriteLine("Done waiting for job manager task.");

                    await boundJobManagerTask.RefreshAsync();

                    //Check to ensure the job manager task exited successfully.
                    await Helpers.CheckForTaskSuccessAsync(boundJobManagerTask, dumpStandardOutOnTaskSuccess : false);

                    if (timedOut)
                    {
                        throw new TimeoutException(string.Format("Timed out waiting for job manager task to complete."));
                    }

                    //
                    // Download and write out the reducer tasks output
                    //

                    string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.BlobContainer, Constants.ReducerTaskResultBlobName);

                    Console.WriteLine("Reducer reuslts:");
                    Console.WriteLine(reducerText);
                }
                finally
                {
                    //Delete the job.
                    //This will delete the auto pool associated with the job as long as the pool
                    //keep alive property is set to false.
                    if (this.textSearchSettings.ShouldDeleteJob)
                    {
                        Console.WriteLine("Deleting job {0}", jobId);
                        batchClient.JobOperations.DeleteJob(jobId);
                    }

                    //Note that there were files uploaded to a container specified in the
                    //configuration file.  This container will not be deleted or cleaned up by this sample.
                }
            }
        }