public async Task GivenConfigurationSetAndFhirResources_WhenTransform_CdmFolderShouldBeGenerated()
        {
            string cdmFolder = Guid.NewGuid().ToString("N");
            BaseMappingDefinitionLoader            loader          = new LocalMappingDefinitionLoader("TestResource\\testSchema");
            IEnumerable <TabularMappingDefinition> tabularMappings = loader.Load();
            CdmCorpusDefinition defination = CdmSchemaGenerator.InitLocalcdmCorpusDefinition(cdmFolder);

            CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination);
            await cdmSchemaGenerator.InitializeCdmFolderAsync(tabularMappings);

            ISource source = new LocalNdjsonSource(Path.Combine("TestResource", "FhirResource"));
            ISink   sink   = new LocalCsvSink(cdmFolder)
            {
                CsvFilePath = (string tableName) =>
                {
                    return($"data/Local{tableName}/{tableName}-partition-data.csv");
                }
            };
            var transformer = new BasicFhirElementTabularTransformer();

            FhirElementTabularTransformer.IdGenerator = () =>
            {
                return("0000");
            };
            TransformationExecutor executor = new TransformationExecutor(source, sink, tabularMappings, transformer);

            await executor.ExecuteAsync();

            // Whether generated CDM schema
            Assert.IsTrue(File.Exists(Path.Combine(cdmFolder, "default.manifest.cdm.json")));
            Assert.IsTrue(File.Exists(Path.Combine(cdmFolder, "LocalPatient.cdm.json")));
            Assert.IsTrue(File.Exists(Path.Combine(cdmFolder, "LocalPatientName.cdm.json")));

            // If generated flatten data
            Assert.IsTrue(Directory.Exists(Path.Combine(cdmFolder, "data", "LocalPatient")));
            Assert.IsTrue(Directory.Exists(Path.Combine(cdmFolder, "data", "LocalPatientName")));

            // If generated data are same with ground truth
            string[] tableNames =
            {
                "Patient",
                "AllergyIntolerance",
                "CarePlan",
                "Encounter",
                "Location",
                "Observation",

                "PatientName",
                "PatientFlattenJson",
                "EncounterClass",
                "CarePlanPeriod"
            };

            foreach (var tableName in tableNames)
            {
                var sourceFilePath = Path.Combine("TestResource", "TestOutput", $"Local{tableName}", $"{tableName}-partition-data.csv");
                var targetFilePath = Path.Combine(@$ "{cdmFolder}", "data", $"Local{tableName}", $"{tableName}-partition-data.csv");
                Assert.IsTrue(CheckSameContent(sourceFilePath, targetFilePath), $"{sourceFilePath} and {targetFilePath} are different.");
            }
        }
Exemple #2
0
        static async Task Main(string[] args)
        {
            TransformationLogging.LoggerFactory = LoggerFactory.Create(builder => {
                builder.AddFilter("Microsoft", LogLevel.Warning)
                .AddFilter("System", LogLevel.Warning)
                .AddFilter("Microsoft.Health.Fhir.Transformation", LogLevel.Information)
                .AddConsole();
            });

            ILogger logger = TransformationLogging.CreateLogger <Program>();

            var rootCommand = new RootCommand()
            {
                new Option <string>("--config"),
                new Option <string>("--input"),
                new Option <string>("--output"),
                new Option <int>("--maxDepth", getDefaultValue: () => 3),
            };

            rootCommand.Handler = CommandHandler.Create(
                new Func <string, string, string, int, Task>(async(config, input, output, maxDepth) =>
            {
                LocalMappingDefinitionLoader configLoader = new LocalMappingDefinitionLoader(config, maxDepth);
                TabularMappingDefinition[] mappings       = configLoader.Load();
                FhirElementTabularTransformer transformer = new BasicFhirElementTabularTransformer();

                logger.LogInformation("Start to generate CDM schema.");
                CdmCorpusDefinition defination        = CdmSchemaGenerator.InitLocalcdmCorpusDefinition(output);
                CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination);
                cdmSchemaGenerator.InitializeCdmFolderAsync(mappings).Wait();
                logger.LogInformation("Generate CDM schema completed.");

                string operationId = Guid.NewGuid().ToString("N");
                ISource source     = new LocalNdjsonSource(input);
                ISink sink         = new LocalCsvSink(output)
                {
                    CsvFilePath = (string tableName) =>
                    {
                        return($"data/Local{tableName}/partition-data-{operationId}.csv");
                    }
                };

                logger.LogInformation("Start to transform data.");
                IProgress <(int, int)> progressHandler = new Progress <(int, int)>(progress =>
                {
                    if (progress.Item1 % 100 == 0 || progress.Item2 % 100 == 0)
                    {
                        logger.LogInformation($"({progress.Item1} loaded, {progress.Item2} transformed) to CDM folder. {DateTime.UtcNow.ToLongTimeString()}");
                    }
                });

                TransformationExecutor executor = new TransformationExecutor(source, sink, mappings, transformer);
                await executor.ExecuteAsync(progressHandler);
                logger.LogInformation("Complete to transform data.");
            }));

            await rootCommand.InvokeAsync(args);
        }
Exemple #3
0
        static async Task Main(string[] args)
        {
            TransformationLogging.LoggerFactory = LoggerFactory.Create(builder => {
                builder.AddFilter("Microsoft", LogLevel.Warning)
                .AddFilter("System", LogLevel.Warning)
                .AddFilter("Microsoft.Health.Fhir.Transformation", LogLevel.Information)
                .AddConsole();
            });
            System.Net.ServicePointManager.DefaultConnectionLimit = 10 * 1024;

            ILogger logger = TransformationLogging.CreateLogger <Program>();

            var rootCommand = new RootCommand();

            var generateSchemaCommand = new Command("generate-schema")
            {
                new Option <string>("--clientId"),
                new Option <string>("--tenantId"),
                new Option <string>("--adlsAccount"),
                new Option <string>("--cdmFileSystem"),
                new Option <string>("--configurationContainer", getDefaultValue: () => "config"),
                new Option <string>("--clientSecret"),
                new Option <int>("--maxDepth", getDefaultValue: () => 3)
            };

            generateSchemaCommand.Handler = CommandHandler.Create <string, string, string, string, string, string, int>(
                async(clientId, tenantId, adlsAccount, cdmFileSystem, configurationContainer, clientSecret, maxDepth) =>
            {
                logger.LogInformation("Start to generate CDM schema.");
                ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

                StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
                TabularMappingDefinition[] mappings  = configLoader.Load();

                AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential);
                await sink.InitAsync();
                await sink.CreateFileSystemClientIfNotExistAsync();

                CdmCorpusDefinition defination        = InitAdlscdmCorpusDefinition(adlsAccount, "/" + cdmFileSystem, tenantId, clientId, clientSecret);
                CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination);
                List <string> entities = await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls");

                WriteActivityOutputs(entities);

                logger.LogInformation("Generate CDM schema completed.");
            });
            rootCommand.AddCommand(generateSchemaCommand);

            var transformDataCommand = new Command("transform-data")
            {
                new Option <string>("--clientId"),
                new Option <string>("--tenantId"),
                new Option <string>("--adlsAccount"),
                new Option <string>("--cdmFileSystem"),
                new Option <string>("--inputBlobUri"),
                new Option <string>("--configurationContainer"),
                new Option <string>("--clientSecret"),
                new Option <string>("--operationId"),
                new Option <string>("--maxDepth"),
            };

            Func <string, string, string, string, string, string, string, string, int, Task> transformDataAction =
                async(clientId, tenantId, adlsAccount, cdmFileSystem, inputBlobUri, configurationContainer, operationId, clientSecret, maxDepth) =>
            {
                logger.LogInformation("Start to transform data.");
                ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

                StorageDefinitionLoader    configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
                TabularMappingDefinition[] mappings     = configLoader.Load();

                Uri     inputUri = new Uri(inputBlobUri);
                ISource source   = new StorageBlobNdjsonSource(inputUri, credential)
                {
                    ConcurrentCount = Environment.ProcessorCount * 2
                };

                string      fileName = Path.GetFileNameWithoutExtension(inputUri.AbsolutePath);
                AdlsCsvSink sink     = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential)
                {
                    CsvFilePath = (string tableName) =>
                    {
                        return($"data/Local{tableName}/partition-data-{tableName}-{fileName}-{operationId}.csv");
                    },
                    ConcurrentCount = Environment.ProcessorCount * 2
                };

                TransformationExecutor executor = new TransformationExecutor(source,
                                                                             sink,
                                                                             mappings,
                                                                             new BasicFhirElementTabularTransformer());
                executor.ConcurrentCount = Environment.ProcessorCount * 2;
                IProgress <(int, int)> progressHandler = new Progress <(int, int)>(progress =>
                {
                    if (progress.Item1 % 100 == 0 || progress.Item2 % 100 == 0)
                    {
                        logger.LogInformation($"({progress.Item1} loaded, {progress.Item2} transformed) to CDM folder. {DateTime.UtcNow.ToLongTimeString()}");
                    }
                });

                await executor.ExecuteAsync(progressHandler);

                logger.LogInformation("Transform data complete.");
            };

            transformDataCommand.Handler = HandlerDescriptor.FromDelegate(transformDataAction).GetCommandHandler();
            rootCommand.AddCommand(transformDataCommand);

            await rootCommand.InvokeAsync(args);
        }
        public async Task GivenListOfContent_WhenTransform_ResultShouldBeReturnedInOrder()
        {
            const int testResourceCount = 100000;
            const int tableCount        = 10;
            const int itemCount         = 14;

            int  itemLoaded   = 0;
            bool sourceOpened = false;
            bool sourceClosed = false;
            var  source       = new TestSource();

            source.OpenInternalAsync  = () => { sourceOpened = true; return(Task.CompletedTask); };
            source.CloseInternalAsync = () => { sourceClosed = true; return(Task.CompletedTask); };
            source.ReadInternalAsync  = () =>
            {
                if (itemLoaded >= testResourceCount)
                {
                    return(Task.FromResult <string>(null));
                }

                return(Task.FromResult <string>($"{itemLoaded++}"));
            };

            bool          sinkInit     = false;
            bool          sinkComplete = false;
            List <string> itemRecords  = new List <string>();
            var           sink         = new TestSink();

            sink.InitInternalAsync  = () => { sinkInit = true; return(Task.CompletedTask); };
            sink.WriteInternalAsync = (tableName, columns, entity) =>
            {
                itemRecords.Add($"{tableName}_{entity["index"].valueObj}");
                return(Task.CompletedTask);
            };
            sink.CompleteInternalAsync = () =>
            {
                Assert.AreEqual(testResourceCount * tableCount * itemCount, itemRecords.Count);

                int index      = 0;
                int tableIndex = 0;
                int itemIndex  = 0;

                for (; index < testResourceCount; ++index)
                {
                    for (; tableIndex < tableCount; ++tableIndex)
                    {
                        for (; itemIndex < itemCount; ++itemIndex)
                        {
                            int actualIndex = index * tableCount * itemCount + tableIndex * itemCount + itemIndex;
                            Assert.AreEqual($"{index}_{tableIndex}_{itemIndex}", itemRecords[actualIndex]);
                        }
                    }
                }

                sinkComplete = true;
                return(Task.CompletedTask);
            };

            TransformationExecutor executor = new TransformationExecutor(source, sink, null, null);

            executor.TransformToTabularEntitiesAsync =
                async(resourceContent, parser, tabularMappings, transformer) =>
            {
                var result = new List <(string tableName, string[] columns, Dictionary <string, (object, object)>[] entities)>();