public void AnonymizeBulkDataFolder(string inputFolder, string outputFolder, bool isRecursive) { var directorySearchOption = isRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly; var bulkResourceFileList = Directory.EnumerateFiles(inputFolder, "*.ndjson", directorySearchOption).ToList(); Console.WriteLine($"Find {bulkResourceFileList.Count()} bulk data resource files in '{inputFolder}'."); foreach (var bulkResourceFileName in bulkResourceFileList) { Console.WriteLine($"Processing {bulkResourceFileName}"); var bulkResourceOutputFileName = GetResourceOutputFileName(bulkResourceFileName, inputFolder, outputFolder); if (isRecursive) { var resourceOutputFolder = Path.GetDirectoryName(bulkResourceOutputFileName); Directory.CreateDirectory(resourceOutputFolder); } int completedCount = 0; int failedCount = 0; int consumeCompletedCount = 0; using (FileStream inputStream = new FileStream(bulkResourceFileName, FileMode.Open)) using (FileStream outputStream = new FileStream(bulkResourceOutputFileName, FileMode.Create)) { using FhirStreamReader reader = new FhirStreamReader(inputStream); using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream); Func <string, string> anonymizeFunction = (content) => _engine.AnonymizeJson(content); Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); FhirPartitionedExecutor executor = new FhirPartitionedExecutor(reader, consumer, anonymizeFunction); executor.PartitionCount = Environment.ProcessorCount; Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>(); progress.ProgressChanged += (obj, args) => { Interlocked.Add(ref completedCount, args.ProcessCompleted); Interlocked.Add(ref failedCount, args.ProcessFailed); Interlocked.Add(ref consumeCompletedCount, args.ConsumeCompleted); Console.WriteLine($"[{stopWatch.Elapsed.ToString()}]: {completedCount} Process completed. {failedCount} Process failed. {consumeCompletedCount} Consume completed."); }; executor.ExecuteAsync(CancellationToken.None, false, progress).Wait(); } Console.WriteLine($"Finished processing '{bulkResourceFileName}'!"); } }
public async Task GivenAFhirStreamConsumer_WhenConsumeData_ShouldReadAllDataFromStream() { using MemoryStream outputStream = new MemoryStream(); using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream); int count = await consumer.ConsumeAsync(new List<string>() { "abc", "bcd", ""}); Assert.Equal(3, count); await consumer.CompleteAsync(); outputStream.Position = 0; using StreamReader reader = new StreamReader(outputStream); Assert.Equal("abc", await reader.ReadLineAsync()); Assert.Equal("bcd", await reader.ReadLineAsync()); Assert.Equal("", await reader.ReadLineAsync()); Assert.Null(await reader.ReadLineAsync()); }
public async Task AnonymizeAsync() { var directorySearchOption = _isRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly; var bulkResourceFileList = Directory.EnumerateFiles(_inputFolder, "*.ndjson", directorySearchOption).ToList(); Console.WriteLine($"Find {bulkResourceFileList.Count()} bulk data resource files in '{_inputFolder}'."); foreach (var bulkResourceFileName in bulkResourceFileList) { Console.WriteLine($"Processing {bulkResourceFileName}"); var bulkResourceOutputFileName = GetResourceOutputFileName(bulkResourceFileName, _inputFolder, _outputFolder); if (_isRecursive) { var resourceOutputFolder = Path.GetDirectoryName(bulkResourceOutputFileName); Directory.CreateDirectory(resourceOutputFolder); } int completedCount = 0; int failedCount = 0; int consumeCompletedCount = 0; using (FileStream inputStream = new FileStream(bulkResourceFileName, FileMode.Open)) using (FileStream outputStream = new FileStream(bulkResourceOutputFileName, FileMode.Create)) { using FhirStreamReader reader = new FhirStreamReader(inputStream); using FhirStreamConsumer consumer = new FhirStreamConsumer(outputStream); Func <string, string> anonymizeFunction = (content) => { try { var engine = AnonymizerEngine.CreateWithFileContext(_configFilePath, bulkResourceFileName, _inputFolder); var settings = new AnonymizerSettings() { IsPrettyOutput = false, ValidateInput = _validateInput, ValidateOutput = _validateOutput }; return(engine.AnonymizeJson(content, settings)); } catch (Exception ex) { Console.Error.WriteLine($"Error:\nResource: {content}\nErrorMessage: {ex.ToString()}"); throw; } }; Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, consumer, anonymizeFunction); executor.PartitionCount = Environment.ProcessorCount * 2; Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>(); progress.ProgressChanged += (obj, args) => { Interlocked.Add(ref completedCount, args.ProcessCompleted); Interlocked.Add(ref failedCount, args.ProcessFailed); Interlocked.Add(ref consumeCompletedCount, args.ConsumeCompleted); Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {completedCount} Process completed. {failedCount} Process failed. {consumeCompletedCount} Consume completed."); }; await executor.ExecuteAsync(CancellationToken.None, false, progress).ConfigureAwait(false); } Console.WriteLine($"Finished processing '{bulkResourceFileName}'!"); } }