Пример #1
0
        /// <summary>
        /// Populates Azure Storage with the required files, and
        /// submits the job to the Azure Batch service.
        /// </summary>
        public async Task RunAsync()
        {
            Console.WriteLine("Running with the following settings: ");
            Console.WriteLine("----------------------------------------");
            Console.WriteLine(this.textSearchSettings.ToString());
            Console.WriteLine(this.accountSettings.ToString());

            CloudStorageAccount cloudStorageAccount = new CloudStorageAccount(
                new StorageCredentials(
                    this.accountSettings.StorageAccountName,
                    this.accountSettings.StorageAccountKey),
                this.accountSettings.StorageServiceUrl,
                useHttps: true);

            //Upload resources if required.
            if (this.textSearchSettings.ShouldUploadResources)
            {
                Console.WriteLine("Splitting file: {0} into {1} subfiles",
                                  Constants.TextFilePath,
                                  this.textSearchSettings.NumberOfMapperTasks);

                //Split the text file into the correct number of files for consumption by the mapper tasks.
                FileSplitter  splitter        = new FileSplitter();
                List <string> mapperTaskFiles = await splitter.SplitAsync(
                    Constants.TextFilePath,
                    this.textSearchSettings.NumberOfMapperTasks);

                List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList();

                await SampleHelpers.UploadResourcesAsync(
                    cloudStorageAccount,
                    this.textSearchSettings.BlobContainer,
                    files);
            }

            //Generate a SAS for the container.
            string containerSasUrl = SampleHelpers.ConstructContainerSas(
                cloudStorageAccount,
                this.textSearchSettings.BlobContainer);

            //Set up the Batch Service credentials used to authenticate with the Batch Service.
            BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials(
                this.accountSettings.BatchServiceUrl,
                this.accountSettings.BatchAccountName,
                this.accountSettings.BatchAccountKey);

            using (BatchClient batchClient = await BatchClient.OpenAsync(credentials))
            {
                //
                // Construct the job properties in local memory before commiting them to the Batch Service.
                //

                //Allow enough compute nodes in the pool to run each mapper task, and 1 extra to run the job manager.
                int numberOfPoolComputeNodes = 1 + this.textSearchSettings.NumberOfMapperTasks;

                //Define the pool specification for the pool which the job will run on.
                PoolSpecification poolSpecification = new PoolSpecification()
                {
                    TargetDedicated    = numberOfPoolComputeNodes,
                    VirtualMachineSize = "small",
                    //You can learn more about os families and versions at:
                    //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix
                    OSFamily        = "4",
                    TargetOSVersion = "*"
                };

                //Use the auto pool feature of the Batch Service to create a pool when the job is created.
                //This creates a new pool for each job which is added.
                AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification()
                {
                    AutoPoolIdPrefix   = "TextSearchPool",
                    KeepAlive          = false,
                    PoolLifetimeOption = PoolLifetimeOption.Job,
                    PoolSpecification  = poolSpecification
                };

                //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above.
                PoolInformation poolInformation = new PoolInformation()
                {
                    AutoPoolSpecification = autoPoolSpecification
                };

                //Define the job manager for this job.  This job manager will run first and will submit the tasks for
                //the job.  The job manager is the executable which manages the lifetime of the job
                //and all tasks which should run for the job.  In this case, the job manager submits the mapper and reducer tasks.
                List <ResourceFile> jobManagerResourceFiles = SampleHelpers.GetResourceFiles(containerSasUrl, Constants.RequiredExecutableFiles);
                const string        jobManagerTaskId        = "JobManager";

                JobManagerTask jobManagerTask = new JobManagerTask()
                {
                    ResourceFiles = jobManagerResourceFiles,
                    CommandLine   = Constants.JobManagerExecutable,

                    //Determines if the job should terminate when the job manager process exits.
                    KillJobOnCompletion = true,
                    Id = jobManagerTaskId
                };

                //Create the unbound job in local memory.  An object which exists only in local memory (and not on the Batch Service) is "unbound".
                string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss");

                CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation);
                unboundJob.JobManagerTask = jobManagerTask; //Assign the job manager task to this job

                try
                {
                    //Commit the unbound job to the Batch Service.
                    Console.WriteLine("Adding job: {0} to the Batch Service.", unboundJob.Id);
                    await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above.

                    //
                    // Wait for the job manager task to complete.
                    //

                    //An object which is backed by a corresponding Batch Service object is "bound."
                    CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId);

                    CloudTask boundJobManagerTask = await boundJob.GetTaskAsync(jobManagerTaskId);

                    TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30);

                    // Monitor the current tasks to see when they are done.
                    // Occasionally a task may get killed and requeued during an upgrade or hardware failure, including the job manager
                    // task.  The job manager will be re-run in this case.  Robustness against this was not added into the sample for
                    // simplicity, but should be added into any production code.
                    Console.WriteLine("Waiting for job's tasks to complete");

                    TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();
                    bool             timedOut         = await taskStateMonitor.WaitAllAsync(new List <CloudTask> {
                        boundJobManagerTask
                    }, TaskState.Completed, maxJobCompletionTimeout);

                    Console.WriteLine("Done waiting for job manager task.");

                    await boundJobManagerTask.RefreshAsync();

                    //Check to ensure the job manager task exited successfully.
                    await Helpers.CheckForTaskSuccessAsync(boundJobManagerTask, dumpStandardOutOnTaskSuccess : false);

                    if (timedOut)
                    {
                        throw new TimeoutException(string.Format("Timed out waiting for job manager task to complete."));
                    }

                    //
                    // Download and write out the reducer tasks output
                    //

                    string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.BlobContainer, Constants.ReducerTaskResultBlobName);

                    Console.WriteLine("Reducer reuslts:");
                    Console.WriteLine(reducerText);
                }
                finally
                {
                    //Delete the job.
                    //This will delete the auto pool associated with the job as long as the pool
                    //keep alive property is set to false.
                    if (this.textSearchSettings.ShouldDeleteJob)
                    {
                        Console.WriteLine("Deleting job {0}", jobId);
                        batchClient.JobOperations.DeleteJob(jobId);
                    }

                    //Note that there were files uploaded to a container specified in the
                    //configuration file.  This container will not be deleted or cleaned up by this sample.
                }
            }
        }
Пример #2
0
        /// <summary>
        /// Populates Azure Storage with the required files, and
        /// submits the job to the Azure Batch service.
        /// </summary>
        public async Task RunAsync()
        {
            Console.WriteLine("Running with the following settings: ");
            Console.WriteLine("----------------------------------------");
            Console.WriteLine(this.textSearchSettings.ToString());
            Console.WriteLine(this.accountSettings.ToString());

            CloudStorageAccount cloudStorageAccount = new CloudStorageAccount(
                new StorageCredentials(
                    this.accountSettings.StorageAccountName,
                    this.accountSettings.StorageAccountKey),
                this.accountSettings.StorageServiceUrl,
                useHttps: true);

            //Upload resources if required
            Console.WriteLine($"Creating container {this.textSearchSettings.OutputBlobContainer} if it doesn't exist...");
            var blobClient      = cloudStorageAccount.CreateCloudBlobClient();
            var outputContainer = blobClient.GetContainerReference(this.textSearchSettings.OutputBlobContainer);
            await outputContainer.CreateIfNotExistsAsync();

            if (this.textSearchSettings.ShouldUploadResources)
            {
                Console.WriteLine("Splitting file: {0} into {1} subfiles",
                                  Constants.TextFilePath,
                                  this.textSearchSettings.NumberOfMapperTasks);

                //Split the text file into the correct number of files for consumption by the mapper tasks.
                FileSplitter  splitter        = new FileSplitter();
                List <string> mapperTaskFiles = await splitter.SplitAsync(
                    Constants.TextFilePath,
                    this.textSearchSettings.NumberOfMapperTasks);

                List <string> files = Constants.RequiredExecutableFiles.Union(mapperTaskFiles).ToList();

                await SampleHelpers.UploadResourcesAsync(
                    cloudStorageAccount,
                    this.textSearchSettings.InputBlobContainer,
                    files);
            }

            //Generate a SAS for the container.
            string inputContainerSasUrl = SampleHelpers.ConstructContainerSas(
                cloudStorageAccount,
                this.textSearchSettings.InputBlobContainer,
                permissions: WindowsAzure.Storage.Blob.SharedAccessBlobPermissions.Read);

            string outputContainerSasUrl = SampleHelpers.ConstructContainerSas(
                cloudStorageAccount,
                this.textSearchSettings.OutputBlobContainer,
                permissions: WindowsAzure.Storage.Blob.SharedAccessBlobPermissions.Read |
                WindowsAzure.Storage.Blob.SharedAccessBlobPermissions.Write);

            //Set up the Batch Service credentials used to authenticate with the Batch Service.
            BatchSharedKeyCredentials credentials = new BatchSharedKeyCredentials(
                this.accountSettings.BatchServiceUrl,
                this.accountSettings.BatchAccountName,
                this.accountSettings.BatchAccountKey);

            using (BatchClient batchClient = BatchClient.Open(credentials))
            {
                //
                // Construct the job properties in local memory before commiting them to the Batch Service.
                //

                //Allow enough compute nodes in the pool to run each mapper task
                int numberOfPoolComputeNodes = this.textSearchSettings.NumberOfMapperTasks;

                //Define the pool specification for the pool which the job will run on.
                PoolSpecification poolSpecification = new PoolSpecification()
                {
                    TargetDedicatedComputeNodes = numberOfPoolComputeNodes,
                    VirtualMachineSize          = "standard_d1_v2",
                    //You can learn more about os families and versions at:
                    //http://azure.microsoft.com/documentation/articles/cloud-services-guestos-update-matrix
                    CloudServiceConfiguration = new CloudServiceConfiguration(osFamily: "5")
                };

                //Use the auto pool feature of the Batch Service to create a pool when the job is created.
                //This creates a new pool for each job which is added.
                AutoPoolSpecification autoPoolSpecification = new AutoPoolSpecification()
                {
                    AutoPoolIdPrefix   = "TextSearchPool",
                    KeepAlive          = false,
                    PoolLifetimeOption = PoolLifetimeOption.Job,
                    PoolSpecification  = poolSpecification
                };

                //Define the pool information for this job -- it will run on the pool defined by the auto pool specification above.
                PoolInformation poolInformation = new PoolInformation()
                {
                    AutoPoolSpecification = autoPoolSpecification
                };

                //Create the unbound job in local memory.  An object which exists only in local memory (and not on the Batch Service) is "unbound".
                string jobId = Environment.GetEnvironmentVariable("USERNAME") + DateTime.UtcNow.ToString("yyyyMMdd-HHmmss");

                CloudJob unboundJob = batchClient.JobOperations.CreateJob(jobId, poolInformation);
                unboundJob.UsesTaskDependencies = true;

                try
                {
                    //Commit the unbound job to the Batch Service.
                    Console.WriteLine($"Adding job: {unboundJob.Id} to the Batch Service.");
                    await unboundJob.CommitAsync(); //Issues a request to the Batch Service to add the job which was defined above.

                    // Add tasks to the job
                    var mapperTasks = CreateMapperTasks(inputContainerSasUrl, outputContainerSasUrl);
                    var reducerTask = CreateReducerTask(inputContainerSasUrl, outputContainerSasUrl, mapperTasks);

                    var tasksToAdd = Enumerable.Concat(mapperTasks, new[] { reducerTask });

                    //Submit the unbound task collection to the Batch Service.
                    //Use the AddTask method which takes a collection of CloudTasks for the best performance.
                    Console.WriteLine("Submitting {0} mapper tasks", this.textSearchSettings.NumberOfMapperTasks);
                    Console.WriteLine("Submitting 1 reducer task");
                    await batchClient.JobOperations.AddTaskAsync(jobId, tasksToAdd);

                    //An object which is backed by a corresponding Batch Service object is "bound."
                    CloudJob boundJob = await batchClient.JobOperations.GetJobAsync(jobId);

                    // Update the job now that we've added tasks so that when all of the tasks which we have added
                    // are complete, the job will automatically move to the completed state.
                    boundJob.OnAllTasksComplete = OnAllTasksComplete.TerminateJob;
                    boundJob.Commit();
                    boundJob.Refresh();

                    //
                    // Wait for the tasks to complete.
                    //
                    List <CloudTask> tasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync();

                    TimeSpan maxJobCompletionTimeout = TimeSpan.FromMinutes(30);

                    // Monitor the current tasks to see when they are done.
                    // Occasionally a task may get killed and requeued during an upgrade or hardware failure,
                    // Robustness against this was not added into the sample for
                    // simplicity, but should be added into any production code.
                    Console.WriteLine("Waiting for job's tasks to complete");

                    TaskStateMonitor taskStateMonitor = batchClient.Utilities.CreateTaskStateMonitor();
                    try
                    {
                        await taskStateMonitor.WhenAll(tasks, TaskState.Completed, maxJobCompletionTimeout);
                    }
                    finally
                    {
                        Console.WriteLine("Done waiting for all tasks to complete");

                        // Refresh the task list
                        tasks = await batchClient.JobOperations.ListTasks(jobId).ToListAsync();

                        //Check to ensure the job manager task exited successfully.
                        foreach (var task in tasks)
                        {
                            await Helpers.CheckForTaskSuccessAsync(task, dumpStandardOutOnTaskSuccess : false);
                        }
                    }

                    //
                    // Download and write out the reducer tasks output
                    //
                    string reducerText = await SampleHelpers.DownloadBlobTextAsync(cloudStorageAccount, this.textSearchSettings.OutputBlobContainer, Constants.ReducerTaskResultBlobName);

                    Console.WriteLine("Reducer reuslts:");
                    Console.WriteLine(reducerText);
                }
                finally
                {
                    //Delete the job.
                    //This will delete the auto pool associated with the job as long as the pool
                    //keep alive property is set to false.
                    if (this.textSearchSettings.ShouldDeleteJob)
                    {
                        Console.WriteLine($"Deleting job {jobId}");
                        await batchClient.JobOperations.DeleteJobAsync(jobId);
                    }

                    if (this.textSearchSettings.ShouldDeleteContainers)
                    {
                        Console.WriteLine("Deleting containers");
                        var inputContainer = blobClient.GetContainerReference(this.textSearchSettings.InputBlobContainer);
                        await inputContainer.DeleteIfExistsAsync();

                        await outputContainer.DeleteIfExistsAsync();
                    }
                }
            }
        }