public async Task GivenABlobFile_WhenExecutorWithoutAnonymize_DataShouldBeSame(string connectionString, string containerName, string blobName) { string targetContainerName = Guid.NewGuid().ToString("N"); string targetBlobName = Guid.NewGuid().ToString("N"); BlobContainerClient containerClient = new BlobContainerClient(connectionString, targetContainerName); await containerClient.CreateIfNotExistsAsync(); try { BlobClient sourceBlobClient = new BlobClient(connectionString, containerName, blobName, DataFactoryCustomActivity.BlobClientOptions.Value); BlockBlobClient targetBlobClient = new BlockBlobClient(connectionString, targetContainerName, targetBlobName, DataFactoryCustomActivity.BlobClientOptions.Value); using FhirBlobDataStream stream = new FhirBlobDataStream(sourceBlobClient); using FhirStreamReader reader = new FhirStreamReader(stream); FhirBlobConsumer consumer = new FhirBlobConsumer(targetBlobClient); var executor = new FhirPartitionedExecutor <string, string>(reader, consumer, content => content); await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); Assert.Equal(sourceBlobClient.GetProperties().Value.ContentLength, targetBlobClient.GetProperties().Value.ContentLength); } finally { await containerClient.DeleteIfExistsAsync().ConfigureAwait(false); } }
public async Task GivenAFhirBlobStream_WhenDownloadData_AllDataShouldbeReturned() { string containerName = Guid.NewGuid().ToString("N"); string blobName = Guid.NewGuid().ToString("N"); BlobContainerClient containerClient = new BlobContainerClient("UseDevelopmentStorage=true", containerName); try { await containerClient.CreateIfNotExistsAsync(); var blobClient = containerClient.GetBlobClient(blobName); List <string> expectedResult = await GenerateTestBlob(blobClient); FhirBlobDataStream stream = new FhirBlobDataStream(blobClient); StreamReader reader = new StreamReader(stream); for (int i = 0; i < expectedResult.Count; ++i) { var content = await reader.ReadLineAsync(); Assert.Equal(expectedResult[i], content); } } finally { await containerClient.DeleteIfExistsAsync(); } }
public async Task GivenAFhirBlobStream_WhenDownloadDataTimeout_OperationShouldBeRetried() { string containerName = Guid.NewGuid().ToString("N"); string blobName = Guid.NewGuid().ToString("N"); BlobContainerClient containerClient = new BlobContainerClient("UseDevelopmentStorage=true", containerName); try { await containerClient.CreateIfNotExistsAsync(); var blobClient = containerClient.GetBlobClient(blobName); List <string> expectedResult = await GenerateTestBlob(blobClient); FhirBlobDataStream stream = new FhirBlobDataStream(blobClient); Dictionary <long, int> enterRecord = new Dictionary <long, int>(); stream.BlockDownloadTimeoutRetryCount = 1; stream.BlockDownloadTimeoutInSeconds = 5; stream.DownloadDataFunc = async(client, range) => { if (!enterRecord.ContainsKey(range.Offset)) { enterRecord[range.Offset] = 0; } if (enterRecord[range.Offset]++ < 1) { Thread.Sleep(TimeSpan.FromSeconds(10)); } var downloadInfo = await client.DownloadAsync(range).ConfigureAwait(false); return(downloadInfo.Value.Content); }; StreamReader reader = new StreamReader(stream); for (int i = 0; i < expectedResult.Count; ++i) { var content = await reader.ReadLineAsync(); Assert.Equal(expectedResult[i], content); } foreach (int count in enterRecord.Values) { Assert.Equal(2, count); } } finally { await containerClient.DeleteIfExistsAsync(); } }
private async Task AnonymizeSingleBlobInNdJsonFormatAsync(BlobClient inputBlobClient, BlockBlobClient outputBlobClient, string blobName, string inputFolderPrefix) { var processedCount = 0; int skippedCount = 0; var consumedCount = 0; using FhirBlobDataStream inputStream = new FhirBlobDataStream(inputBlobClient); FhirStreamReader reader = new FhirStreamReader(inputStream); FhirBlobConsumer consumer = new FhirBlobConsumer(outputBlobClient); var engine = AnonymizerEngine.CreateWithFileContext(_configFile, blobName, inputFolderPrefix); Func <string, string> anonymizerFunction = (item) => { try { return(engine.AnonymizeJson(item)); } catch (Exception ex) { Console.WriteLine($"[{blobName}]: Anonymize partial failed, you can find detail error message in stderr.txt."); Console.Error.WriteLine($"[{blobName}]: Resource: {item}\nErrorMessage: {ex.Message}\n Details: {ex.ToString()}\nStackTrace: {ex.StackTrace}"); throw; } }; Stopwatch stopWatch = Stopwatch.StartNew(); FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizerFunction); executor.PartitionCount = Environment.ProcessorCount * 2; Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>(); progress.ProgressChanged += (obj, args) => { Interlocked.Add(ref processedCount, args.ProcessCompleted); Interlocked.Add(ref skippedCount, args.ProcessSkipped); Interlocked.Add(ref consumedCount, args.ConsumeCompleted); Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {processedCount} Completed. {skippedCount} Skipped. {consumedCount} consume completed."); }; await executor.ExecuteAsync(CancellationToken.None, progress).ConfigureAwait(false); }