private async Task AnonymizeBlobsInJsonFormat(ActivityInputData inputData, BlobContainerClient inputContainer, BlobContainerClient outputContainer, string inputBlobPrefix, string outputBlobPrefix) { IEnumerable <BlobItem> blobsInJsonFormat = inputContainer.GetBlobs(BlobTraits.None, BlobStates.None, inputBlobPrefix, default).Where(blob => IsInputFileInJsonFormat(blob.Name)); FhirEnumerableReader <BlobItem> reader = new FhirEnumerableReader <BlobItem>(blobsInJsonFormat); Func <BlobItem, Task <string> > anonymizeBlobFunc = async(blob) => { string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix); Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainer.Name}'"); var inputBlobClient = new BlobClient(inputData.SourceStorageConnectionString, inputContainer.Name, blob.Name, BlobClientOptions.Value); var outputBlobClient = new BlockBlobClient(inputData.DestinationStorageConnectionString, outputContainer.Name, outputBlobName, BlobClientOptions.Value); if (inputData.SkipExistedFile && await outputBlobClient.ExistsAsync().ConfigureAwait(false)) { Console.WriteLine($"[{blob.Name}]:'{outputBlobName}' already exist. Skip"); return(string.Empty); } await outputBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); await AnonymizeSingleBlobInJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name, inputBlobPrefix).ConfigureAwait(false); return(string.Empty); }; FhirPartitionedExecutor <BlobItem, string> executor = new FhirPartitionedExecutor <BlobItem, string>(reader, null, anonymizeBlobFunc); executor.PartitionCount = Environment.ProcessorCount * 2; executor.BatchSize = 1; await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); }
private async Task AnonymizeBlobsInNdJsonFormat(ActivityInputData inputData, BlobContainerClient inputContainer, BlobContainerClient outputContainer, string inputBlobPrefix, string outputBlobPrefix) { await foreach (BlobItem blob in inputContainer.GetBlobsAsync(BlobTraits.None, BlobStates.None, inputBlobPrefix, default)) { if (IsInputFileInJsonFormat(blob.Name)) { continue; } string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix); Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainer.Name}'"); var inputBlobClient = new BlobClient(inputData.SourceStorageConnectionString, inputContainer.Name, blob.Name, BlobClientOptions.Value); var outputBlobClient = new BlockBlobClient(inputData.DestinationStorageConnectionString, outputContainer.Name, outputBlobName, BlobClientOptions.Value); if (inputData.SkipExistedFile && await outputBlobClient.ExistsAsync().ConfigureAwait(false)) { Console.WriteLine($"[{blob.Name}]:'{outputBlobName}' already exist. Skip"); } else { await outputBlobClient.DeleteIfExistsAsync().ConfigureAwait(false); await AnonymizeSingleBlobInNdJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name, inputBlobPrefix); } } }
public async Task AnonymizeDataset(ActivityInputData inputData, bool force) { var inputContainer = new BlobContainerClient(inputData.SourceStorageConnectionString, inputData.SourceContainerName.ToLower()); if (!await inputContainer.ExistsAsync()) { throw new Exception($"Error: The specified container {inputData.SourceContainerName} does not exist."); } string outputContainerName = inputData.DestinationContainerName.ToLower(); var outputContainer = new BlobContainerClient(inputData.DestinationStorageConnectionString, outputContainerName); await outputContainer.CreateIfNotExistsAsync(); string inputBlobPrefix = GetBlobPrefixFromFolderPath(inputData.SourceFolderPath);; string outputBlobPrefix = GetBlobPrefixFromFolderPath(inputData.DestinationFolderPath);; var skippedBlobCount = 0; var skippedBlobList = new List <string>(); await foreach (BlobItem blob in inputContainer.GetBlobsAsync(BlobTraits.None, BlobStates.None, inputBlobPrefix, default)) { string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix); Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainerName}'"); var inputBlobClient = inputContainer.GetBlobClient(blob.Name); var outputBlobClient = outputContainer.GetBlobClient(outputBlobName); var isOutputExist = await outputBlobClient.ExistsAsync(); if (!force && isOutputExist) { Console.WriteLine($"Blob file {blob.Name} already exists in {inputData.DestinationContainerName}, skipping.."); skippedBlobCount += 1; skippedBlobList.Add(blob.Name); continue; } else if (force && isOutputExist) { await outputBlobClient.DeleteAsync(); } if (IsInputFileInJsonFormat(blob.Name)) { await AnonymizeBlobInJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name); } else { await AnonymizeBlobInNdJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name); } } if (skippedBlobCount > 0) { Console.WriteLine($"Skipped {skippedBlobCount} files already exists in destination container: {skippedBlobList.ToString()}"); Console.WriteLine($"If you want to overwrite existing blob in {inputData.DestinationContainerName} container, please use the -f or --force flag"); } }
public async Task AnonymizeDataset(ActivityInputData inputData, bool force) { string inputContainerName = inputData.SourceContainerName.ToLower(); var inputContainer = new BlobContainerClient(inputData.SourceStorageConnectionString, inputContainerName); if (!await inputContainer.ExistsAsync()) { throw new Exception($"Error: The specified container {inputData.SourceContainerName} does not exist."); } string outputContainerName = inputData.DestinationContainerName.ToLower(); var outputContainer = new BlobContainerClient(inputData.DestinationStorageConnectionString, outputContainerName); await outputContainer.CreateIfNotExistsAsync(); string inputBlobPrefix = GetBlobPrefixFromFolderPath(inputData.SourceFolderPath);; string outputBlobPrefix = GetBlobPrefixFromFolderPath(inputData.DestinationFolderPath);; await AnonymizeBlobsInJsonFormat(inputData, inputContainer, outputContainer, inputBlobPrefix, outputBlobPrefix).ConfigureAwait(false); await AnonymizeBlobsInNdJsonFormat(inputData, inputContainer, outputContainer, inputBlobPrefix, outputBlobPrefix).ConfigureAwait(false); }