Esempio n. 1
0
        public async Task GivenAStorageContainerWithConfig_WhenLoadDefinition_ConfigurationShouldBeReturned()
        {
            if (string.IsNullOrEmpty(_testSettings.AdlsAccountName))
            {
                return;
            }

            var    credential    = Program.GetClientSecretCredential(_testSettings.TenantId, _testSettings.ClientId, _testSettings.Secret);
            string containerName = Guid.NewGuid().ToString("N");

            (int tableCount, int propertiesGroupCount) = await PrepareConfigContainerAsync(credential, containerName);

            string storageServiceUri = $"https://{_testSettings.AdlsAccountName}.blob.core.windows.net";
            var    loader            = new StorageDefinitionLoader(new Uri(storageServiceUri), containerName, credential, 3);
            var    tables            = loader.LoadTableDefinitionsContent();

            Assert.IsTrue(tables.First().Length > 0);
            Assert.AreEqual(tableCount, tables.Count());

            var propertiesGroups = loader.LoadPropertiesGroupsContent();

            Assert.IsTrue(propertiesGroups.First().Length > 0);
            Assert.AreEqual(propertiesGroupCount, propertiesGroups.Count());
        }
Esempio n. 2
0
        static async Task Main(string[] args)
        {
            TransformationLogging.LoggerFactory = LoggerFactory.Create(builder => {
                builder.AddFilter("Microsoft", LogLevel.Warning)
                .AddFilter("System", LogLevel.Warning)
                .AddFilter("Microsoft.Health.Fhir.Transformation", LogLevel.Information)
                .AddConsole();
            });
            System.Net.ServicePointManager.DefaultConnectionLimit = 10 * 1024;

            ILogger logger = TransformationLogging.CreateLogger <Program>();

            var rootCommand = new RootCommand();

            var generateSchemaCommand = new Command("generate-schema")
            {
                new Option <string>("--clientId"),
                new Option <string>("--tenantId"),
                new Option <string>("--adlsAccount"),
                new Option <string>("--cdmFileSystem"),
                new Option <string>("--configurationContainer", getDefaultValue: () => "config"),
                new Option <string>("--clientSecret"),
                new Option <int>("--maxDepth", getDefaultValue: () => 3)
            };

            generateSchemaCommand.Handler = CommandHandler.Create <string, string, string, string, string, string, int>(
                async(clientId, tenantId, adlsAccount, cdmFileSystem, configurationContainer, clientSecret, maxDepth) =>
            {
                logger.LogInformation("Start to generate CDM schema.");
                ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

                StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
                TabularMappingDefinition[] mappings  = configLoader.Load();

                AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential);
                await sink.InitAsync();
                await sink.CreateFileSystemClientIfNotExistAsync();

                CdmCorpusDefinition defination        = InitAdlscdmCorpusDefinition(adlsAccount, "/" + cdmFileSystem, tenantId, clientId, clientSecret);
                CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination);
                List <string> entities = await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls");

                WriteActivityOutputs(entities);

                logger.LogInformation("Generate CDM schema completed.");
            });
            rootCommand.AddCommand(generateSchemaCommand);

            var transformDataCommand = new Command("transform-data")
            {
                new Option <string>("--clientId"),
                new Option <string>("--tenantId"),
                new Option <string>("--adlsAccount"),
                new Option <string>("--cdmFileSystem"),
                new Option <string>("--inputBlobUri"),
                new Option <string>("--configurationContainer"),
                new Option <string>("--clientSecret"),
                new Option <string>("--operationId"),
                new Option <string>("--maxDepth"),
            };

            Func <string, string, string, string, string, string, string, string, int, Task> transformDataAction =
                async(clientId, tenantId, adlsAccount, cdmFileSystem, inputBlobUri, configurationContainer, operationId, clientSecret, maxDepth) =>
            {
                logger.LogInformation("Start to transform data.");
                ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

                StorageDefinitionLoader    configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
                TabularMappingDefinition[] mappings     = configLoader.Load();

                Uri     inputUri = new Uri(inputBlobUri);
                ISource source   = new StorageBlobNdjsonSource(inputUri, credential)
                {
                    ConcurrentCount = Environment.ProcessorCount * 2
                };

                string      fileName = Path.GetFileNameWithoutExtension(inputUri.AbsolutePath);
                AdlsCsvSink sink     = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential)
                {
                    CsvFilePath = (string tableName) =>
                    {
                        return($"data/Local{tableName}/partition-data-{tableName}-{fileName}-{operationId}.csv");
                    },
                    ConcurrentCount = Environment.ProcessorCount * 2
                };

                TransformationExecutor executor = new TransformationExecutor(source,
                                                                             sink,
                                                                             mappings,
                                                                             new BasicFhirElementTabularTransformer());
                executor.ConcurrentCount = Environment.ProcessorCount * 2;
                IProgress <(int, int)> progressHandler = new Progress <(int, int)>(progress =>
                {
                    if (progress.Item1 % 100 == 0 || progress.Item2 % 100 == 0)
                    {
                        logger.LogInformation($"({progress.Item1} loaded, {progress.Item2} transformed) to CDM folder. {DateTime.UtcNow.ToLongTimeString()}");
                    }
                });

                await executor.ExecuteAsync(progressHandler);

                logger.LogInformation("Transform data complete.");
            };

            transformDataCommand.Handler = HandlerDescriptor.FromDelegate(transformDataAction).GetCommandHandler();
            rootCommand.AddCommand(transformDataCommand);

            await rootCommand.InvokeAsync(args);
        }