public async Task GivenAStorageBlobNdjsonSource_WhenDownloadData_AllDataShouldbeReturned_Async() { if (string.IsNullOrEmpty(_testSettings.BlobUri)) { return; } var credential = Program.GetClientSecretCredential(_testSettings.TenantId, _testSettings.ClientId, _testSettings.Secret); string containerName = Guid.NewGuid().ToString("N"); string blobName = Guid.NewGuid().ToString("N"); Uri containerUri = new Uri(_testSettings.BlobUri + "/" + containerName); Uri docUri = new Uri(_testSettings.BlobUri + "/" + containerName + "/" + blobName); BlobContainerClient containerClient = new BlobContainerClient(containerUri, credential); var source = new StorageBlobNdjsonSource(docUri, credential); try { await source.OpenAsync(); await containerClient.CreateIfNotExistsAsync(); var blobClient = containerClient.GetBlobClient(blobName); List <string> expectedResult = GenerateTestBlobAsync(blobClient).Result; for (int i = 0; i < expectedResult.Count; ++i) { var content = await source.ReadAsync(); Assert.Equal(expectedResult[i], content); } } finally { await source.CloseAsync(); await containerClient.DeleteIfExistsAsync(); } return; }
static async Task Main(string[] args) { TransformationLogging.LoggerFactory = LoggerFactory.Create(builder => { builder.AddFilter("Microsoft", LogLevel.Warning) .AddFilter("System", LogLevel.Warning) .AddFilter("Microsoft.Health.Fhir.Transformation", LogLevel.Information) .AddConsole(); }); System.Net.ServicePointManager.DefaultConnectionLimit = 10 * 1024; ILogger logger = TransformationLogging.CreateLogger <Program>(); var rootCommand = new RootCommand(); var generateSchemaCommand = new Command("generate-schema") { new Option <string>("--clientId"), new Option <string>("--tenantId"), new Option <string>("--adlsAccount"), new Option <string>("--cdmFileSystem"), new Option <string>("--configurationContainer", getDefaultValue: () => "config"), new Option <string>("--clientSecret"), new Option <int>("--maxDepth", getDefaultValue: () => 3) }; generateSchemaCommand.Handler = CommandHandler.Create <string, string, string, string, string, string, int>( async(clientId, tenantId, adlsAccount, cdmFileSystem, configurationContainer, clientSecret, maxDepth) => { logger.LogInformation("Start to generate CDM schema."); ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret); StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth); TabularMappingDefinition[] mappings = configLoader.Load(); AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential); await sink.InitAsync(); await sink.CreateFileSystemClientIfNotExistAsync(); CdmCorpusDefinition defination = InitAdlscdmCorpusDefinition(adlsAccount, "/" + cdmFileSystem, tenantId, clientId, clientSecret); CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination); List <string> entities = await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls"); WriteActivityOutputs(entities); logger.LogInformation("Generate CDM schema completed."); }); rootCommand.AddCommand(generateSchemaCommand); var transformDataCommand = new Command("transform-data") { new Option <string>("--clientId"), new Option <string>("--tenantId"), new Option <string>("--adlsAccount"), new Option <string>("--cdmFileSystem"), new Option <string>("--inputBlobUri"), new Option <string>("--configurationContainer"), new Option <string>("--clientSecret"), new Option <string>("--operationId"), new Option <string>("--maxDepth"), }; Func <string, string, string, string, string, string, string, string, int, Task> transformDataAction = async(clientId, tenantId, adlsAccount, cdmFileSystem, inputBlobUri, configurationContainer, operationId, clientSecret, maxDepth) => { logger.LogInformation("Start to transform data."); ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret); StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth); TabularMappingDefinition[] mappings = configLoader.Load(); Uri inputUri = new Uri(inputBlobUri); ISource source = new StorageBlobNdjsonSource(inputUri, credential) { ConcurrentCount = Environment.ProcessorCount * 2 }; string fileName = Path.GetFileNameWithoutExtension(inputUri.AbsolutePath); AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential) { CsvFilePath = (string tableName) => { return($"data/Local{tableName}/partition-data-{tableName}-{fileName}-{operationId}.csv"); }, ConcurrentCount = Environment.ProcessorCount * 2 }; TransformationExecutor executor = new TransformationExecutor(source, sink, mappings, new BasicFhirElementTabularTransformer()); executor.ConcurrentCount = Environment.ProcessorCount * 2; IProgress <(int, int)> progressHandler = new Progress <(int, int)>(progress => { if (progress.Item1 % 100 == 0 || progress.Item2 % 100 == 0) { logger.LogInformation($"({progress.Item1} loaded, {progress.Item2} transformed) to CDM folder. {DateTime.UtcNow.ToLongTimeString()}"); } }); await executor.ExecuteAsync(progressHandler); logger.LogInformation("Transform data complete."); }; transformDataCommand.Handler = HandlerDescriptor.FromDelegate(transformDataAction).GetCommandHandler(); rootCommand.AddCommand(transformDataCommand); await rootCommand.InvokeAsync(args); }
public async Task GivenAStorageBlobNdjsonSource_WhenDownloadDataTimeout_OperationShouldBeRetried_Async() { if (string.IsNullOrEmpty(_testSettings.BlobUri)) { return; } var credential = Program.GetClientSecretCredential(_testSettings.TenantId, _testSettings.ClientId, _testSettings.Secret); string containerName = Guid.NewGuid().ToString("N"); string blobName = Guid.NewGuid().ToString("N"); Uri containerUri = new Uri(_testSettings.BlobUri + "/" + containerName); Uri docUri = new Uri(_testSettings.BlobUri + "/" + containerName + "/" + blobName); BlobContainerClient containerClient = new BlobContainerClient(containerUri, credential); var source = new StorageBlobNdjsonSource(docUri, credential); try { await containerClient.CreateIfNotExistsAsync(); var blobClient = containerClient.GetBlobClient(blobName); List <string> expectedResult = GenerateTestBlobAsync(blobClient).Result; Dictionary <long, int> enterRecord = new Dictionary <long, int>(); source.BlockDownloadTimeoutRetryCount = 1; source.BlockDownloadTimeoutInSeconds = 5; await source.OpenAsync(); source._stream.DownloadDataFunc = async(client, range) => { if (!enterRecord.ContainsKey(range.Offset)) { enterRecord[range.Offset] = 0; } if (enterRecord[range.Offset]++ < 1) { Thread.Sleep(TimeSpan.FromSeconds(10)); } var downloadInfo = await client.DownloadAsync(range); return(downloadInfo.Value.Content); }; for (int i = 0; i < expectedResult.Count; ++i) { var content = await source.ReadAsync(); Assert.Equal(expectedResult[i], content); } foreach (int count in enterRecord.Values) { Assert.Equal(2, count); } } finally { await source.CloseAsync(); await containerClient.DeleteIfExistsAsync(); } return; }