public async Task GivenABlobFile_WhenExecutorWithoutAnonymize_DataShouldBeSame(string connectionString, string containerName, string blobName)
        {
            string targetContainerName = Guid.NewGuid().ToString("N");
            string targetBlobName      = Guid.NewGuid().ToString("N");

            BlobContainerClient containerClient = new BlobContainerClient(connectionString, targetContainerName);
            await containerClient.CreateIfNotExistsAsync();

            try
            {
                BlobClient      sourceBlobClient = new BlobClient(connectionString, containerName, blobName, DataFactoryCustomActivity.BlobClientOptions.Value);
                BlockBlobClient targetBlobClient = new BlockBlobClient(connectionString, targetContainerName, targetBlobName, DataFactoryCustomActivity.BlobClientOptions.Value);

                using FhirBlobDataStream stream = new FhirBlobDataStream(sourceBlobClient);
                using FhirStreamReader reader   = new FhirStreamReader(stream);
                FhirBlobConsumer consumer = new FhirBlobConsumer(targetBlobClient);

                var executor = new FhirPartitionedExecutor <string, string>(reader, consumer, content => content);
                await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false);

                Assert.Equal(sourceBlobClient.GetProperties().Value.ContentLength, targetBlobClient.GetProperties().Value.ContentLength);
            }
            finally
            {
                await containerClient.DeleteIfExistsAsync().ConfigureAwait(false);
            }
        }
Example #2
0
        public async Task GivenAFhirBlobStream_WhenDownloadData_AllDataShouldbeReturned()
        {
            string containerName = Guid.NewGuid().ToString("N");
            string blobName      = Guid.NewGuid().ToString("N");
            BlobContainerClient containerClient = new BlobContainerClient("UseDevelopmentStorage=true", containerName);

            try
            {
                await containerClient.CreateIfNotExistsAsync();

                var blobClient = containerClient.GetBlobClient(blobName);

                List <string> expectedResult = await GenerateTestBlob(blobClient);

                FhirBlobDataStream stream = new FhirBlobDataStream(blobClient);
                StreamReader       reader = new StreamReader(stream);
                for (int i = 0; i < expectedResult.Count; ++i)
                {
                    var content = await reader.ReadLineAsync();

                    Assert.Equal(expectedResult[i], content);
                }
            }
            finally
            {
                await containerClient.DeleteIfExistsAsync();
            }
        }
Example #3
0
        public async Task GivenAFhirBlobStream_WhenDownloadDataTimeout_OperationShouldBeRetried()
        {
            string containerName = Guid.NewGuid().ToString("N");
            string blobName      = Guid.NewGuid().ToString("N");
            BlobContainerClient containerClient = new BlobContainerClient("UseDevelopmentStorage=true", containerName);

            try
            {
                await containerClient.CreateIfNotExistsAsync();

                var blobClient = containerClient.GetBlobClient(blobName);

                List <string> expectedResult = await GenerateTestBlob(blobClient);

                FhirBlobDataStream     stream      = new FhirBlobDataStream(blobClient);
                Dictionary <long, int> enterRecord = new Dictionary <long, int>();
                stream.BlockDownloadTimeoutRetryCount = 1;
                stream.BlockDownloadTimeoutInSeconds  = 5;
                stream.DownloadDataFunc = async(client, range) =>
                {
                    if (!enterRecord.ContainsKey(range.Offset))
                    {
                        enterRecord[range.Offset] = 0;
                    }

                    if (enterRecord[range.Offset]++ < 1)
                    {
                        Thread.Sleep(TimeSpan.FromSeconds(10));
                    }

                    var downloadInfo = await client.DownloadAsync(range).ConfigureAwait(false);

                    return(downloadInfo.Value.Content);
                };

                StreamReader reader = new StreamReader(stream);
                for (int i = 0; i < expectedResult.Count; ++i)
                {
                    var content = await reader.ReadLineAsync();

                    Assert.Equal(expectedResult[i], content);
                }

                foreach (int count in enterRecord.Values)
                {
                    Assert.Equal(2, count);
                }
            }
            finally
            {
                await containerClient.DeleteIfExistsAsync();
            }
        }
        private async Task AnonymizeSingleBlobInNdJsonFormatAsync(BlobClient inputBlobClient, BlockBlobClient outputBlobClient, string blobName, string inputFolderPrefix)
        {
            var processedCount = 0;
            int skippedCount   = 0;
            var consumedCount  = 0;

            using FhirBlobDataStream inputStream = new FhirBlobDataStream(inputBlobClient);
            FhirStreamReader reader   = new FhirStreamReader(inputStream);
            FhirBlobConsumer consumer = new FhirBlobConsumer(outputBlobClient);
            var engine = AnonymizerEngine.CreateWithFileContext(_configFile, blobName, inputFolderPrefix);
            Func <string, string> anonymizerFunction = (item) =>
            {
                try
                {
                    return(engine.AnonymizeJson(item));
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"[{blobName}]: Anonymize partial failed, you can find detail error message in stderr.txt.");
                    Console.Error.WriteLine($"[{blobName}]: Resource: {item}\nErrorMessage: {ex.Message}\n Details: {ex.ToString()}\nStackTrace: {ex.StackTrace}");
                    throw;
                }
            };

            Stopwatch stopWatch = Stopwatch.StartNew();
            FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizerFunction);

            executor.PartitionCount = Environment.ProcessorCount * 2;
            Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();

            progress.ProgressChanged += (obj, args) =>
            {
                Interlocked.Add(ref processedCount, args.ProcessCompleted);
                Interlocked.Add(ref skippedCount, args.ProcessSkipped);
                Interlocked.Add(ref consumedCount, args.ConsumeCompleted);

                Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {processedCount} Completed. {skippedCount} Skipped. {consumedCount} consume completed.");
            };

            await executor.ExecuteAsync(CancellationToken.None, progress).ConfigureAwait(false);
        }