private async Task AnonymizeBlobsInJsonFormat(ActivityInputData inputData, BlobContainerClient inputContainer, BlobContainerClient outputContainer, string inputBlobPrefix, string outputBlobPrefix) { IEnumerable <BlobItem> blobsInJsonFormat = inputContainer.GetBlobs(BlobTraits.None, BlobStates.None, inputBlobPrefix, default).Where(blob => IsInputFileInJsonFormat(blob.Name)); FhirEnumerableReader <BlobItem> reader = new FhirEnumerableReader <BlobItem>(blobsInJsonFormat); Func <BlobItem, Task <string> > anonymizeBlobFunc = async(blob) => { string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix); Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainer.Name}'"); var inputBlobClient = new BlobClient(inputData.SourceStorageConnectionString, inputContainer.Name, blob.Name, BlobClientOptions.Value); var outputBlobClient = new BlockBlobClient(inputData.DestinationStorageConnectionString, outputContainer.Name, outputBlobName, BlobClientOptions.Value); if (inputData.SkipExistedFile && await outputBlobClient.ExistsAsync().ConfigureAwait(false)) { Console.WriteLine($"[{blob.Name}]:'{outputBlobName}' already exist. Skip"); return(string.Empty); } await outputBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); await AnonymizeSingleBlobInJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name, inputBlobPrefix).ConfigureAwait(false); return(string.Empty); }; FhirPartitionedExecutor <BlobItem, string> executor = new FhirPartitionedExecutor <BlobItem, string>(reader, null, anonymizeBlobFunc); executor.PartitionCount = Environment.ProcessorCount * 2; executor.BatchSize = 1; await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); }
private async Task AnonymizeBlobsInNdJsonFormat(ActivityInputData inputData, BlobContainerClient inputContainer, BlobContainerClient outputContainer, string inputBlobPrefix, string outputBlobPrefix) { await foreach (BlobItem blob in inputContainer.GetBlobsAsync(BlobTraits.None, BlobStates.None, inputBlobPrefix, default)) { if (IsInputFileInJsonFormat(blob.Name)) { continue; } string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix); Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainer.Name}'"); var inputBlobClient = new BlobClient(inputData.SourceStorageConnectionString, inputContainer.Name, blob.Name, BlobClientOptions.Value); var outputBlobClient = new BlockBlobClient(inputData.DestinationStorageConnectionString, outputContainer.Name, outputBlobName, BlobClientOptions.Value); if (inputData.SkipExistedFile && await outputBlobClient.ExistsAsync().ConfigureAwait(false)) { Console.WriteLine($"[{blob.Name}]:'{outputBlobName}' already exist. Skip"); } else { await outputBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); await AnonymizeSingleBlobInNdJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name, inputBlobPrefix); } } }
public async Task AnonymizeDataset(ActivityInputData inputData, bool force) { string inputContainerName = inputData.SourceContainerName.ToLower(); var inputContainer = new BlobContainerClient(inputData.SourceStorageConnectionString, inputContainerName); if (!await inputContainer.ExistsAsync()) { throw new Exception($"Error: The specified container {inputData.SourceContainerName} does not exist."); } string outputContainerName = inputData.DestinationContainerName.ToLower(); var outputContainer = new BlobContainerClient(inputData.DestinationStorageConnectionString, outputContainerName); await outputContainer.CreateIfNotExistsAsync(); string inputBlobPrefix = GetBlobPrefixFromFolderPath(inputData.SourceFolderPath);; string outputBlobPrefix = GetBlobPrefixFromFolderPath(inputData.DestinationFolderPath);; await AnonymizeBlobsInJsonFormat(inputData, inputContainer, outputContainer, inputBlobPrefix, outputBlobPrefix).ConfigureAwait(false); await AnonymizeBlobsInNdJsonFormat(inputData, inputContainer, outputContainer, inputBlobPrefix, outputBlobPrefix).ConfigureAwait(false); }