public async Task GivenABlobFile_WhenExecutorWithoutAnonymize_DataShouldBeSame(string connectionString, string containerName, string blobName) { string targetContainerName = Guid.NewGuid().ToString("N"); string targetBlobName = Guid.NewGuid().ToString("N"); BlobContainerClient containerClient = new BlobContainerClient(connectionString, targetContainerName); await containerClient.CreateIfNotExistsAsync(); try { BlobClient sourceBlobClient = new BlobClient(connectionString, containerName, blobName, DataFactoryCustomActivity.BlobClientOptions.Value); BlockBlobClient targetBlobClient = new BlockBlobClient(connectionString, targetContainerName, targetBlobName, DataFactoryCustomActivity.BlobClientOptions.Value); using FhirBlobDataStream stream = new FhirBlobDataStream(sourceBlobClient); using FhirStreamReader reader = new FhirStreamReader(stream); FhirBlobConsumer consumer = new FhirBlobConsumer(targetBlobClient); var executor = new FhirPartitionedExecutor <string, string>(reader, consumer, content => content); await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); Assert.Equal(sourceBlobClient.GetProperties().Value.ContentLength, targetBlobClient.GetProperties().Value.ContentLength); } finally { await containerClient.DeleteIfExistsAsync().ConfigureAwait(false); } }
public async Task GivenAFhirBlobConsumer_WhenConsumeData_AllDataShouldbeUploaded() { string containerName = Guid.NewGuid().ToString("N"); string blobName = Guid.NewGuid().ToString("N"); string connectionString = "UseDevelopmentStorage=true"; BlobContainerClient containerClient = new BlobContainerClient(connectionString, containerName); await containerClient.CreateIfNotExistsAsync(); try { BlockBlobClient blobClient = new BlockBlobClient(connectionString, containerName, blobName); await blobClient.DeleteIfExistsAsync(); FhirBlobConsumer consumer = new FhirBlobConsumer(blobClient); long totalSize = 0; Progress <long> progress = new Progress <long>(); progress.ProgressChanged += (obj, args) => { Interlocked.Add(ref totalSize, args); }; consumer.ProgressHandler = progress; int seed = DateTime.Now.Second % 30; foreach (var batch in GenerateTestData(20, 10000, seed)) { await consumer.ConsumeAsync(batch); } await consumer.CompleteAsync(); using StreamReader reader = new StreamReader((await blobClient.DownloadAsync()).Value.Content); foreach (var batch in GenerateTestData(20, 10000, seed)) { foreach (var item in batch) { Assert.Equal(item, await reader.ReadLineAsync()); } } Assert.Null(await reader.ReadLineAsync()); Assert.Equal((await blobClient.GetPropertiesAsync()).Value.ContentLength, totalSize); } finally { await containerClient.DeleteIfExistsAsync(); } }
private async Task AnonymizeSingleBlobInNdJsonFormatAsync(BlobClient inputBlobClient, BlockBlobClient outputBlobClient, string blobName, string inputFolderPrefix) { var processedCount = 0; int skippedCount = 0; var consumedCount = 0; using FhirBlobDataStream inputStream = new FhirBlobDataStream(inputBlobClient); FhirStreamReader reader = new FhirStreamReader(inputStream); FhirBlobConsumer consumer = new FhirBlobConsumer(outputBlobClient); var engine = AnonymizerEngine.CreateWithFileContext(_configFile, blobName, inputFolderPrefix); Func <string, string> anonymizerFunction = (item) => { try { return(engine.AnonymizeJson(item)); } catch (Exception ex) { Console.WriteLine($"[{blobName}]: Anonymize partial failed, you can find detail error message in stderr.txt."); Console.Error.WriteLine($"[{blobName}]: Resource: {item}\nErrorMessage: {ex.Message}\n Details: {ex.ToString()}\nStackTrace: {ex.StackTrace}"); throw; } }; Stopwatch stopWatch = Stopwatch.StartNew(); FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizerFunction); executor.PartitionCount = Environment.ProcessorCount * 2; Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>(); progress.ProgressChanged += (obj, args) => { Interlocked.Add(ref processedCount, args.ProcessCompleted); Interlocked.Add(ref skippedCount, args.ProcessSkipped); Interlocked.Add(ref consumedCount, args.ConsumeCompleted); Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {processedCount} Completed. {skippedCount} Skipped. {consumedCount} consume completed."); }; await executor.ExecuteAsync(CancellationToken.None, progress).ConfigureAwait(false); }