public static JobSettings FromAppSettings() { var settings = new JobSettings(); foreach (var field in typeof(JobSettings).GetFields(BindingFlags.Public | BindingFlags.Instance)) { var raw = ConfigurationManager.AppSettings[field.Name]; if (field.FieldType == typeof(String)) { field.SetValue(settings, raw); } else if (field.FieldType == typeof(Int32)) { field.SetValue(settings, Int32.TryParse(raw, out var intValue) ? intValue : default(int)); } else if (field.FieldType == typeof(Boolean)) { field.SetValue(settings, bool.TryParse(raw, out var boolValue) ? boolValue : default(bool)); } else { throw new NotSupportedException($"The type { field.FieldType.Name } cannot be parsed."); } } return(settings); }
private static async Task <IEnumerable <string> > SubmitMoveTasksAsync(JobSettings settings, BatchClient batchClient, string blobContainerName, IEnumerable <string> blobNames, string jobId) { if (String.IsNullOrEmpty(jobId)) { throw new ArgumentNullException(nameof(jobId)); } // Create the mover task, ensuring that the needed executable is staged var moverExe = $"{ typeof(BlobMover.EntryPoint).Assembly.GetName().Name }.exe"; var fileArtifacts = new ConcurrentBag <ConcurrentDictionary <Type, IFileStagingArtifact> >(); var stagingStorageAccount = new StagingStorageAccount(settings.BatchBlobStorageName, settings.BatchBlobStorageKey, settings.BatchBlobSTorageUrl); var moverFilesToStage = new List <IFileStagingProvider> { new FileToStage($"{ moverExe }", stagingStorageAccount) }; var moverCloudTasks = blobNames.Select(blobName => new CloudTask($"Mover-{ blobName }", $"{ moverExe } { blobContainerName } { blobName }") { FilesToStage = moverFilesToStage }); await batchClient.JobOperations.AddTaskAsync(jobId, moverCloudTasks, fileStagingArtifacts : fileArtifacts).ConfigureAwait(false); return(fileArtifacts .SelectMany(dict => dict).Select(kvp => kvp.Value) .OfType <SequentialFileStagingArtifact>() .Select(artifact => artifact.BlobContainerCreated) .Distinct()); }
private static async Task ProcessAsync(JobSettings settings, string jobId, string containerName, string blobName) { // Simulate splitting of the target blob into chunks by just making some copies of the source blob. var client = CloudStorageAccount.Parse(settings.BatchBlobStorageConnection).CreateCloudBlobClient(); var container = client.GetContainerReference(containerName); var blob = (CloudBlockBlob)container.GetBlobReferenceFromServer(blobName); var leaseDuration = TimeSpan.FromSeconds(60); var leaseName = await blob.AcquireLeaseAsync(leaseDuration).ConfigureAwait(false); var lease = AccessCondition.GenerateLeaseCondition(leaseName); var copies = await Task.WhenAll(Enumerable.Range(0, 5).Select(index => EntryPoint.CopyBlobAsync(container, blob, $"{ Path.GetFileNameWithoutExtension(blobName) }-{ index }{ Path.GetExtension(blobName) }", lease, leaseDuration))).ConfigureAwait(false); blob.Delete(DeleteSnapshotsOption.IncludeSnapshots, lease); // Create tasks for the job that will process each chunk. In this case, that will be one of the copies that we // made to simulate it. using (var batchClient = await BatchClient.OpenAsync(new BatchSharedKeyCredentials(settings.BatchAccountUrl, settings.BatchAccountName, settings.BatchAccountKey))) { // Add a retry policy. The built-in policies are No Retry (default), Linear Retry, and Exponential Retry. batchClient.CustomBehaviors.Add(RetryPolicyProvider.ExponentialRetryProvider(TimeSpan.FromSeconds(settings.RetryDeltaBackoff), settings.RetryMaxCount)); try { // Create tasks with file references here. I'm reusing the existing resource list, since they've already been staged. // For input, in our case, we're letting the task access the blob directly. You could also use file staging here, if you'd rather the task not have // knowledge that it is working on a blob directly. However, since you most likely want to process it for redaction and then upload the result, // it seems like a better idea to work through storage for both. var moverPath = $"{ typeof(BlobMover.EntryPoint).Assembly.GetName().Name }.exe"; var jobResources = batchClient.JobOperations.GetJob(jobId)?.JobManagerTask.ResourceFiles; jobResources.Select(thing => { Console.WriteLine(thing); return(thing); }); var copyTasks = copies .Where(result => result.Created) .Select(result => new CloudTask(Path.GetFileNameWithoutExtension(result.Name), $"{ moverPath } { containerName } { result.Name } {settings.BatchBlobStorageConnection }") { ResourceFiles = jobResources }); await batchClient.JobOperations.AddTaskAsync(jobId, copyTasks).ConfigureAwait(false); // Wait for all of the work associated with processing the chunks to complete. await EntryPoint.WaitForChildTasksAsync(batchClient, jobId, TimeSpan.FromHours(2)).ConfigureAwait(false); // This is where we would create the final tasks to process the results for each of the // processed chunks. After, we would have to perform another WaitForTasksAsync to ensure // that it is complete before cleaning up. // // Because we just simulated work by copying the fake chunk blobs, I skipped those steps. The code // would be virtually identical to the other code in the try block above. // // Alternatively, you could perform the final processing directly here. } catch (Exception ex) { // Surfacing information from failed tasks can be a challenge. I suspct that there are more efficient and easier ways to do so, but // for my purposes, a rudimentary capture and upload to blob store was very helpful. // // NOTE: This catch block is doing a bunch of things, and things that could fail. It goes without saying, this isn't best // practice. var outblob = container.GetBlockBlobReference("task-errors.txt"); using (var memStream = new MemoryStream()) using (var writer = new StreamWriter(memStream)) { writer.WriteLine(ex.GetType().Name); writer.WriteLine(ex.Message); writer.WriteLine(); writer.WriteLine(ex.StackTrace); writer.WriteLine(); if (ex.InnerException != null) { writer.WriteLine(ex.InnerException.GetType().Name); writer.WriteLine(ex.InnerException.Message); writer.WriteLine(); writer.WriteLine(ex.InnerException.StackTrace); writer.WriteLine(); } writer.Flush(); memStream.Position = 0; outblob.UploadFromStream(memStream); writer.Close(); memStream.Close(); } await batchClient.JobOperations.TerminateJobAsync(jobId).ConfigureAwait(false); throw; } finally { // Clean the resource container used for job file storage. // // If we used file staging rather than blob storage access to seed the individual processing jobs, we'd have to clean those // here as well. Those are a bit awkward to get; you have to create a bag to hold and discover them when the task was added // to the job. See line 205 of https://github.com/Azure/azure-batch-samples/blob/master/CSharp/GettingStarted/02_PoolsAndResourceFiles/JobSubmitter/JobSubmitter.cs await client.GetContainerReference(settings.JobResourceContainerName).DeleteIfExistsAsync().ConfigureAwait(false); // Delete the job to ensure the tasks are cleaned up if ((!String.IsNullOrEmpty(jobId)) && (settings.ShouldDeleteJob)) { await batchClient.JobOperations.DeleteJobAsync(jobId).ConfigureAwait(false); } } } }