private async Task AnonymizeBlobsInJsonFormat(ActivityInputData inputData, BlobContainerClient inputContainer, BlobContainerClient outputContainer, string inputBlobPrefix, string outputBlobPrefix)
        {
            IEnumerable <BlobItem>          blobsInJsonFormat = inputContainer.GetBlobs(BlobTraits.None, BlobStates.None, inputBlobPrefix, default).Where(blob => IsInputFileInJsonFormat(blob.Name));
            FhirEnumerableReader <BlobItem> reader            = new FhirEnumerableReader <BlobItem>(blobsInJsonFormat);
            Func <BlobItem, Task <string> > anonymizeBlobFunc = async(blob) =>
            {
                string outputBlobName = GetOutputBlobName(blob.Name, inputBlobPrefix, outputBlobPrefix);
                Console.WriteLine($"[{blob.Name}]:Processing... output to container '{outputContainer.Name}'");

                var inputBlobClient  = new BlobClient(inputData.SourceStorageConnectionString, inputContainer.Name, blob.Name, BlobClientOptions.Value);
                var outputBlobClient = new BlockBlobClient(inputData.DestinationStorageConnectionString, outputContainer.Name, outputBlobName, BlobClientOptions.Value);
                if (inputData.SkipExistedFile && await outputBlobClient.ExistsAsync().ConfigureAwait(false))
                {
                    Console.WriteLine($"[{blob.Name}]:'{outputBlobName}' already exist. Skip");
                    return(string.Empty);
                }

                await outputBlobClient.DeleteIfExistsAsync().ConfigureAwait(false);

                await AnonymizeSingleBlobInJsonFormatAsync(inputBlobClient, outputBlobClient, blob.Name, inputBlobPrefix).ConfigureAwait(false);

                return(string.Empty);
            };

            FhirPartitionedExecutor <BlobItem, string> executor = new FhirPartitionedExecutor <BlobItem, string>(reader, null, anonymizeBlobFunc);

            executor.PartitionCount = Environment.ProcessorCount * 2;
            executor.BatchSize      = 1;

            await executor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false);
        }
Beispiel #2
0
        public async Task GivenAFhirEnumerableReader_ReadData_ResultShouldBeReturnedInOrder()
        {
            int end    = 987;
            var reader = new FhirEnumerableReader <string>(Enumerable.Range(0, end).Select(i => i.ToString()));

            int    i        = 0;
            string nextLine = null;

            while ((nextLine = await reader.NextAsync()) != null)
            {
                Assert.Equal(nextLine, i.ToString());
                i++;
            }

            Assert.Equal(i, end);
        }
        public async Task AnonymizeAsync()
        {
            var directorySearchOption = _options.IsRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
            var resourceFileList      = Directory.EnumerateFiles(_inputFolder, "*.json", directorySearchOption).ToList();

            Console.WriteLine($"Find {resourceFileList.Count()} json resource files in '{_inputFolder}'.");

            FhirEnumerableReader <string>            reader   = new FhirEnumerableReader <string>(resourceFileList);
            FhirPartitionedExecutor <string, string> executor = new FhirPartitionedExecutor <string, string>(reader, null)
            {
                KeepOrder      = false,
                BatchSize      = 1,
                PartitionCount = Environment.ProcessorCount * 2
            };

            executor.AnonymizerFunctionAsync = async file =>
            {
                try
                {
                    return(await FileAnonymize(file).ConfigureAwait(false));
                }
                catch (Exception ex)
                {
                    Console.Error.WriteLine($"Error:\nResource: {file}\nErrorMessage: {ex.ToString()}");
                    throw;
                }
            };

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            int completedCount = 0;
            int failedCount    = 0;
            Progress <BatchAnonymizeProgressDetail> progress = new Progress <BatchAnonymizeProgressDetail>();

            progress.ProgressChanged += (obj, args) =>
            {
                Interlocked.Add(ref completedCount, args.ProcessCompleted);
                Interlocked.Add(ref failedCount, args.ProcessFailed);

                Console.WriteLine($"[{stopWatch.Elapsed.ToString()}][tid:{args.CurrentThreadId}]: {completedCount} Process completed. {failedCount} Process failed.");
            };

            await executor.ExecuteAsync(cancellationToken : CancellationToken.None, false, progress).ConfigureAwait(false);
        }