private async Task AnonymizeBlobsInJsonFormat(ActivityInputData inputData, BlobContainerClient inputContainer, BlobContainerClient outputContainer, string inputBlobPrefix, string outputBlobPrefix) { IEnumerable <BlobItem> blobsInJsonFormat = inputContainer.GetBlobs(BlobTraits.None, BlobStates.None, inputBlobPrefix, default).Where(blob => IsInputFileInJsonFormat(blob.Name)); FhirEnumerableReader <BlobItem> reader = new FhirEnumerableReader <BlobItem>(blobsInJsonFormat); Func <BlobItem, Task <string> > anonymizeBlobFunc = async(blob) => { string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix); Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainer.Name}'"); var inputBlobClient = new BlobClient(inputData.SourceStorageConnectionString, inputContainer.Name, blob.Name, BlobClientOptions.Value); var outputBlobClient = new BlockBlobClient(inputData.DestinationStorageConnectionString, outputContainer.Name, outputBlobName, BlobClientOptions.Value); if (inputData.SkipExistedFile && await outputBlobClient.ExistsAsync().ConfigureAwait(false)) { Console.WriteLine($"[{blob.Name}]:'{outputBlobName}' already exist. Skip"); return(string.Empty); } await outputBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); await AnonymizeSingleBlobInJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name, inputBlobPrefix).ConfigureAwait(false); return(string.Empty); }; FhirPartitionedExecutor <BlobItem, string> executor = new FhirPartitionedExecutor <BlobItem, string>(reader, null, anonymizeBlobFunc); executor.PartitionCount = Environment.ProcessorCount * 2; executor.BatchSize = 1; await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); }
public async Task GivenAFhirEnumerableReader_ReadData_ResultShouldBeReturnedInOrder() { int end = 987; var reader = new FhirEnumerableReader <string>(Enumerable.Range(0, end).Select(i => i.ToString())); int i = 0; string nextLine = null; while ((nextLine = await reader.NextAsync()) != null) { Assert.Equal(nextLine, i.ToString()); i++; } Assert.Equal(i, end); }
public async Task AnonymizeAsync() { var directorySearchOption = _options.IsRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly; var resourceFileList = Directory.EnumerateFiles(_inputFolder, "*.json", directorySearchOption).ToList(); Console.WriteLine($"Find {resourceFileList.Count()} json resource files in '{_inputFolder}'."); FhirEnumerableReader <string> reader = new FhirEnumerableReader <string>(resourceFileList); FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, null) { KeepOrder = false, BatchSize = 1, PartitionCount = Environment.ProcessorCount * 2 }; executor.AnonymizerFunctionAsync = async file => { try { return(await FileAnonymize(file).ConfigureAwait(false)); } catch (Exception ex) { Console.Error.WriteLine($"Error:\nResource: {file}\nErrorMessage: {ex.ToString()}"); throw; } }; Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); int completedCount = 0; int failedCount = 0; Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>(); progress.ProgressChanged += (obj, args) => { Interlocked.Add(ref completedCount, args.ProcessCompleted); Interlocked.Add(ref failedCount, args.ProcessFailed); Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {completedCount} Process completed. {failedCount} Process failed."); }; await executor.ExecuteAsync(cancellationToken : CancellationToken.None, false, progress).ConfigureAwait(false); }