public async Task GivenABlobFile_WhenExecutorWithoutAnonymize_DataShouldBeSame(string connectionString, string containerName, string blobName)
        {
            string targetContainerName = Guid.NewGuid().ToString("N");
            string targetBlobName      = Guid.NewGuid().ToString("N");

            BlobContainerClient containerClient = new BlobContainerClient(connectionString, targetContainerName);
            await containerClient.CreateIfNotExistsAsync();

            try
            {
                BlobClient      sourceBlobClient = new BlobClient(connectionString, containerName, blobName, DataFactoryCustomActivity.BlobClientOptions.Value);
                BlockBlobClient targetBlobClient = new BlockBlobClient(connectionString, targetContainerName, targetBlobName, DataFactoryCustomActivity.BlobClientOptions.Value);

                using FhirBlobDataStream stream = new FhirBlobDataStream(sourceBlobClient);
                using FhirStreamReader reader   = new FhirStreamReader(stream);
                FhirBlobConsumer consumer = new FhirBlobConsumer(targetBlobClient);

                var executor = new FhirPartitionedExecutor <string, string>(reader, consumer, content => content);
                await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false);

                Assert.Equal(sourceBlobClient.GetProperties().Value.ContentLength, targetBlobClient.GetProperties().Value.ContentLength);
            }
            finally
            {
                await containerClient.DeleteIfExistsAsync().ConfigureAwait(false);
            }
        }
        public void AnonymizeBulkDataFolder(string inputFolder, string outputFolder, bool isRecursive)
        {
            var directorySearchOption = isRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
            var bulkResourceFileList  = Directory.EnumerateFiles(inputFolder, "*.ndjson", directorySearchOption).ToList();

            Console.WriteLine($"Find {bulkResourceFileList.Count()} bulk data resource files in '{inputFolder}'.");

            foreach (var bulkResourceFileName in bulkResourceFileList)
            {
                Console.WriteLine($"Processing {bulkResourceFileName}");

                var bulkResourceOutputFileName = GetResourceOutputFileName(bulkResourceFileName, inputFolder, outputFolder);
                if (isRecursive)
                {
                    var resourceOutputFolder = Path.GetDirectoryName(bulkResourceOutputFileName);
                    Directory.CreateDirectory(resourceOutputFolder);
                }

                int completedCount        = 0;
                int failedCount           = 0;
                int consumeCompletedCount = 0;
                using (FileStream inputStream = new FileStream(bulkResourceFileName, FileMode.Open))
                    using (FileStream outputStream = new FileStream(bulkResourceOutputFileName, FileMode.Create))
                    {
                        using FhirStreamReader reader     = new FhirStreamReader(inputStream);
                        using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream);
                        Func <string, string> anonymizeFunction = (content) => _engine.AnonymizeJson(content);

                        Stopwatch stopWatch = new Stopwatch();
                        stopWatch.Start();

                        FhirPartitionedExecutor executor = new FhirPartitionedExecutor(reader, consumer, anonymizeFunction);
                        executor.PartitionCount = Environment.ProcessorCount;
                        Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();
                        progress.ProgressChanged += (obj, args) =>
                        {
                            Interlocked.Add(ref completedCount, args.ProcessCompleted);
                            Interlocked.Add(ref failedCount, args.ProcessFailed);
                            Interlocked.Add(ref consumeCompletedCount, args.ConsumeCompleted);
                            Console.WriteLine($"[{stopWatch.Elapsed.ToString()}]: {completedCount} Process completed. {failedCount} Process failed. {consumeCompletedCount} Consume completed.");
                        };

                        executor.ExecuteAsync(CancellationToken.None, false, progress).Wait();
                    }

                Console.WriteLine($"Finished processing '{bulkResourceFileName}'!");
            }
        }
        public async Task GivenAFhirStreamReader_WhenLoadData_ShouldLoadAllDataFromStream()
        {
            using MemoryStream inputStream = new MemoryStream();
            using StreamWriter writer      = new StreamWriter(inputStream);
            await writer.WriteLineAsync("abc");

            await writer.WriteLineAsync("bcd");

            await writer.WriteLineAsync("");

            writer.Flush();

            inputStream.Position          = 0;
            using FhirStreamReader reader = new FhirStreamReader(inputStream);

            Assert.Equal("abc", await reader.NextAsync());
            Assert.Equal("bcd", await reader.NextAsync());
            Assert.Equal("", await reader.NextAsync());
            Assert.Null(await reader.NextAsync());
        }
        private async Task AnonymizeSingleBlobInNdJsonFormatAsync(BlobClient inputBlobClient, BlockBlobClient outputBlobClient, string blobName, string inputFolderPrefix)
        {
            var processedCount = 0;
            int skippedCount   = 0;
            var consumedCount  = 0;

            using FhirBlobDataStream inputStream = new FhirBlobDataStream(inputBlobClient);
            FhirStreamReader reader   = new FhirStreamReader(inputStream);
            FhirBlobConsumer consumer = new FhirBlobConsumer(outputBlobClient);
            var engine = AnonymizerEngine.CreateWithFileContext(_configFile, blobName, inputFolderPrefix);
            Func <string, string> anonymizerFunction = (item) =>
            {
                try
                {
                    return(engine.AnonymizeJson(item));
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"[{blobName}]: Anonymize partial failed, you can find detail error message in stderr.txt.");
                    Console.Error.WriteLine($"[{blobName}]: Resource: {item}\nErrorMessage: {ex.Message}\n Details: {ex.ToString()}\nStackTrace: {ex.StackTrace}");
                    throw;
                }
            };

            Stopwatch stopWatch = Stopwatch.StartNew();
            FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizerFunction);

            executor.PartitionCount = Environment.ProcessorCount * 2;
            Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();

            progress.ProgressChanged += (obj, args) =>
            {
                Interlocked.Add(ref processedCount, args.ProcessCompleted);
                Interlocked.Add(ref skippedCount, args.ProcessSkipped);
                Interlocked.Add(ref consumedCount, args.ConsumeCompleted);

                Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {processedCount} Completed. {skippedCount} Skipped. {consumedCount} consume completed.");
            };

            await executor.ExecuteAsync(CancellationToken.None, progress).ConfigureAwait(false);
        }
        public async Task AnonymizeAsync()
        {
            var directorySearchOption = _isRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
            var bulkResourceFileList  = Directory.EnumerateFiles(_inputFolder, "*.ndjson", directorySearchOption).ToList();

            Console.WriteLine($"Find {bulkResourceFileList.Count()} bulk data resource files in '{_inputFolder}'.");

            foreach (var bulkResourceFileName in bulkResourceFileList)
            {
                Console.WriteLine($"Processing {bulkResourceFileName}");

                var bulkResourceOutputFileName = GetResourceOutputFileName(bulkResourceFileName, _inputFolder, _outputFolder);
                if (_isRecursive)
                {
                    var resourceOutputFolder = Path.GetDirectoryName(bulkResourceOutputFileName);
                    Directory.CreateDirectory(resourceOutputFolder);
                }

                int completedCount        = 0;
                int failedCount           = 0;
                int consumeCompletedCount = 0;
                using (FileStream inputStream = new FileStream(bulkResourceFileName, FileMode.Open))
                    using (FileStream outputStream = new FileStream(bulkResourceOutputFileName, FileMode.Create))
                    {
                        using FhirStreamReader reader     = new FhirStreamReader(inputStream);
                        using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream);
                        Func <string, string> anonymizeFunction = (content) =>
                        {
                            try
                            {
                                var engine   = AnonymizerEngine.CreateWithFileContext(_configFilePath, bulkResourceFileName, _inputFolder);
                                var settings = new AnonymizerSettings()
                                {
                                    IsPrettyOutput = false,
                                    ValidateInput  = _validateInput,
                                    ValidateOutput = _validateOutput
                                };
                                return(engine.AnonymizeJson(content, settings));
                            }
                            catch (Exception ex)
                            {
                                Console.Error.WriteLine($"Error:\nResource: {content}\nErrorMessage: {ex.ToString()}");
                                throw;
                            }
                        };

                        Stopwatch stopWatch = new Stopwatch();
                        stopWatch.Start();

                        FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizeFunction);
                        executor.PartitionCount = Environment.ProcessorCount * 2;

                        Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();
                        progress.ProgressChanged += (obj, args) =>
                        {
                            Interlocked.Add(ref completedCount, args.ProcessCompleted);
                            Interlocked.Add(ref failedCount, args.ProcessFailed);
                            Interlocked.Add(ref consumeCompletedCount, args.ConsumeCompleted);

                            Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {completedCount} Process completed. {failedCount} Process failed. {consumeCompletedCount} Consume completed.");
                        };

                        await executor.ExecuteAsync(CancellationToken.None, false, progress).ConfigureAwait(false);
                    }

                Console.WriteLine($"Finished processing '{bulkResourceFileName}'!");
            }
        }