public void AnonymizeBulkDataFolder(string inputFolder, string outputFolder, bool isRecursive)
        {
            var directorySearchOption = isRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
            var bulkResourceFileList  = Directory.EnumerateFiles(inputFolder, "*.ndjson", directorySearchOption).ToList();

            Console.WriteLine($"Find {bulkResourceFileList.Count()} bulk data resource files in '{inputFolder}'.");

            foreach (var bulkResourceFileName in bulkResourceFileList)
            {
                Console.WriteLine($"Processing {bulkResourceFileName}");

                var bulkResourceOutputFileName = GetResourceOutputFileName(bulkResourceFileName, inputFolder, outputFolder);
                if (isRecursive)
                {
                    var resourceOutputFolder = Path.GetDirectoryName(bulkResourceOutputFileName);
                    Directory.CreateDirectory(resourceOutputFolder);
                }

                int completedCount        = 0;
                int failedCount           = 0;
                int consumeCompletedCount = 0;
                using (FileStream inputStream = new FileStream(bulkResourceFileName, FileMode.Open))
                    using (FileStream outputStream = new FileStream(bulkResourceOutputFileName, FileMode.Create))
                    {
                        using FhirStreamReader reader     = new FhirStreamReader(inputStream);
                        using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream);
                        Func <string, string> anonymizeFunction = (content) => _engine.AnonymizeJson(content);

                        Stopwatch stopWatch = new Stopwatch();
                        stopWatch.Start();

                        FhirPartitionedExecutor executor = new FhirPartitionedExecutor(reader, consumer, anonymizeFunction);
                        executor.PartitionCount = Environment.ProcessorCount;
                        Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();
                        progress.ProgressChanged += (obj, args) =>
                        {
                            Interlocked.Add(ref completedCount, args.ProcessCompleted);
                            Interlocked.Add(ref failedCount, args.ProcessFailed);
                            Interlocked.Add(ref consumeCompletedCount, args.ConsumeCompleted);
                            Console.WriteLine($"[{stopWatch.Elapsed.ToString()}]: {completedCount} Process completed. {failedCount} Process failed. {consumeCompletedCount} Consume completed.");
                        };

                        executor.ExecuteAsync(CancellationToken.None, false, progress).Wait();
                    }

                Console.WriteLine($"Finished processing '{bulkResourceFileName}'!");
            }
        }
Example #2
0
        public async Task GivenAFhirStreamConsumer_WhenConsumeData_ShouldReadAllDataFromStream()
        {
            using MemoryStream outputStream = new MemoryStream();
            using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream);

            int count = await consumer.ConsumeAsync(new List<string>() { "abc", "bcd", ""});
            Assert.Equal(3, count);
            
            await consumer.CompleteAsync();

            outputStream.Position = 0;
            using StreamReader reader = new StreamReader(outputStream);
            Assert.Equal("abc", await reader.ReadLineAsync());
            Assert.Equal("bcd", await reader.ReadLineAsync());
            Assert.Equal("", await reader.ReadLineAsync());
            Assert.Null(await reader.ReadLineAsync());
        }
        public async Task AnonymizeAsync()
        {
            var directorySearchOption = _isRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
            var bulkResourceFileList  = Directory.EnumerateFiles(_inputFolder, "*.ndjson", directorySearchOption).ToList();

            Console.WriteLine($"Find {bulkResourceFileList.Count()} bulk data resource files in '{_inputFolder}'.");

            foreach (var bulkResourceFileName in bulkResourceFileList)
            {
                Console.WriteLine($"Processing {bulkResourceFileName}");

                var bulkResourceOutputFileName = GetResourceOutputFileName(bulkResourceFileName, _inputFolder, _outputFolder);
                if (_isRecursive)
                {
                    var resourceOutputFolder = Path.GetDirectoryName(bulkResourceOutputFileName);
                    Directory.CreateDirectory(resourceOutputFolder);
                }

                int completedCount        = 0;
                int failedCount           = 0;
                int consumeCompletedCount = 0;
                using (FileStream inputStream = new FileStream(bulkResourceFileName, FileMode.Open))
                    using (FileStream outputStream = new FileStream(bulkResourceOutputFileName, FileMode.Create))
                    {
                        using FhirStreamReader reader     = new FhirStreamReader(inputStream);
                        using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream);
                        Func <string, string> anonymizeFunction = (content) =>
                        {
                            try
                            {
                                var engine   = AnonymizerEngine.CreateWithFileContext(_configFilePath, bulkResourceFileName, _inputFolder);
                                var settings = new AnonymizerSettings()
                                {
                                    IsPrettyOutput = false,
                                    ValidateInput  = _validateInput,
                                    ValidateOutput = _validateOutput
                                };
                                return(engine.AnonymizeJson(content, settings));
                            }
                            catch (Exception ex)
                            {
                                Console.Error.WriteLine($"Error:\nResource: {content}\nErrorMessage: {ex.ToString()}");
                                throw;
                            }
                        };

                        Stopwatch stopWatch = new Stopwatch();
                        stopWatch.Start();

                        FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizeFunction);
                        executor.PartitionCount = Environment.ProcessorCount * 2;

                        Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();
                        progress.ProgressChanged += (obj, args) =>
                        {
                            Interlocked.Add(ref completedCount, args.ProcessCompleted);
                            Interlocked.Add(ref failedCount, args.ProcessFailed);
                            Interlocked.Add(ref consumeCompletedCount, args.ConsumeCompleted);

                            Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {completedCount} Process completed. {failedCount} Process failed. {consumeCompletedCount} Consume completed.");
                        };

                        await executor.ExecuteAsync(CancellationToken.None, false, progress).ConfigureAwait(false);
                    }

                Console.WriteLine($"Finished processing '{bulkResourceFileName}'!");
            }
        }