public async Task GivenAStorageContainerWithConfig_WhenLoadDefinition_ConfigurationShouldBeReturned() { if (string.IsNullOrEmpty(_testSettings.AdlsAccountName)) { return; } var credential = Program.GetClientSecretCredential(_testSettings.TenantId, _testSettings.ClientId, _testSettings.Secret); string containerName = Guid.NewGuid().ToString("N"); (int tableCount, int propertiesGroupCount) = await PrepareConfigContainerAsync(credential, containerName); string storageServiceUri = $"https://{_testSettings.AdlsAccountName}.blob.core.windows.net"; var loader = new StorageDefinitionLoader(new Uri(storageServiceUri), containerName, credential, 3); var tables = loader.LoadTableDefinitionsContent(); Assert.IsTrue(tables.First().Length > 0); Assert.AreEqual(tableCount, tables.Count()); var propertiesGroups = loader.LoadPropertiesGroupsContent(); Assert.IsTrue(propertiesGroups.First().Length > 0); Assert.AreEqual(propertiesGroupCount, propertiesGroups.Count()); }
static async Task Main(string[] args) { TransformationLogging.LoggerFactory = LoggerFactory.Create(builder => { builder.AddFilter("Microsoft", LogLevel.Warning) .AddFilter("System", LogLevel.Warning) .AddFilter("Microsoft.Health.Fhir.Transformation", LogLevel.Information) .AddConsole(); }); System.Net.ServicePointManager.DefaultConnectionLimit = 10 * 1024; ILogger logger = TransformationLogging.CreateLogger <Program>(); var rootCommand = new RootCommand(); var generateSchemaCommand = new Command("generate-schema") { new Option <string>("--clientId"), new Option <string>("--tenantId"), new Option <string>("--adlsAccount"), new Option <string>("--cdmFileSystem"), new Option <string>("--configurationContainer", getDefaultValue: () => "config"), new Option <string>("--clientSecret"), new Option <int>("--maxDepth", getDefaultValue: () => 3) }; generateSchemaCommand.Handler = CommandHandler.Create <string, string, string, string, string, string, int>( async(clientId, tenantId, adlsAccount, cdmFileSystem, configurationContainer, clientSecret, maxDepth) => { logger.LogInformation("Start to generate CDM schema."); ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret); StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth); TabularMappingDefinition[] mappings = configLoader.Load(); AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential); await sink.InitAsync(); await sink.CreateFileSystemClientIfNotExistAsync(); CdmCorpusDefinition defination = InitAdlscdmCorpusDefinition(adlsAccount, "/" + cdmFileSystem, tenantId, clientId, clientSecret); CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination); List <string> entities = await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls"); WriteActivityOutputs(entities); logger.LogInformation("Generate CDM schema completed."); }); rootCommand.AddCommand(generateSchemaCommand); var transformDataCommand = new Command("transform-data") { new Option <string>("--clientId"), new Option <string>("--tenantId"), new Option <string>("--adlsAccount"), new Option <string>("--cdmFileSystem"), new Option <string>("--inputBlobUri"), new Option <string>("--configurationContainer"), new Option <string>("--clientSecret"), new Option <string>("--operationId"), new Option <string>("--maxDepth"), }; Func <string, string, string, string, string, string, string, string, int, Task> transformDataAction = async(clientId, tenantId, adlsAccount, cdmFileSystem, inputBlobUri, configurationContainer, operationId, clientSecret, maxDepth) => { logger.LogInformation("Start to transform data."); ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret); StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth); TabularMappingDefinition[] mappings = configLoader.Load(); Uri inputUri = new Uri(inputBlobUri); ISource source = new StorageBlobNdjsonSource(inputUri, credential) { ConcurrentCount = Environment.ProcessorCount * 2 }; string fileName = Path.GetFileNameWithoutExtension(inputUri.AbsolutePath); AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential) { CsvFilePath = (string tableName) => { return($"data/Local{tableName}/partition-data-{tableName}-{fileName}-{operationId}.csv"); }, ConcurrentCount = Environment.ProcessorCount * 2 }; TransformationExecutor executor = new TransformationExecutor(source, sink, mappings, new BasicFhirElementTabularTransformer()); executor.ConcurrentCount = Environment.ProcessorCount * 2; IProgress <(int, int)> progressHandler = new Progress <(int, int)>(progress => { if (progress.Item1 % 100 == 0 || progress.Item2 % 100 == 0) { logger.LogInformation($"({progress.Item1} loaded, {progress.Item2} transformed) to CDM folder. {DateTime.UtcNow.ToLongTimeString()}"); } }); await executor.ExecuteAsync(progressHandler); logger.LogInformation("Transform data complete."); }; transformDataCommand.Handler = HandlerDescriptor.FromDelegate(transformDataAction).GetCommandHandler(); rootCommand.AddCommand(transformDataCommand); await rootCommand.InvokeAsync(args); }