Ejemplo n.º 1
0
        public async Task GivenAStorageBlobNdjsonSource_WhenDownloadData_AllDataShouldbeReturned_Async()
        {
            if (string.IsNullOrEmpty(_testSettings.BlobUri))
            {
                return;
            }
            var credential = Program.GetClientSecretCredential(_testSettings.TenantId, _testSettings.ClientId, _testSettings.Secret);

            string containerName = Guid.NewGuid().ToString("N");
            string blobName      = Guid.NewGuid().ToString("N");

            Uri containerUri = new Uri(_testSettings.BlobUri + "/" + containerName);
            Uri docUri       = new Uri(_testSettings.BlobUri + "/" + containerName + "/" + blobName);
            BlobContainerClient containerClient = new BlobContainerClient(containerUri, credential);
            var source = new StorageBlobNdjsonSource(docUri, credential);

            try
            {
                await source.OpenAsync();

                await containerClient.CreateIfNotExistsAsync();

                var blobClient = containerClient.GetBlobClient(blobName);

                List <string> expectedResult = GenerateTestBlobAsync(blobClient).Result;

                for (int i = 0; i < expectedResult.Count; ++i)
                {
                    var content = await source.ReadAsync();

                    Assert.Equal(expectedResult[i], content);
                }
            }
            finally
            {
                await source.CloseAsync();

                await containerClient.DeleteIfExistsAsync();
            }
            return;
        }
Ejemplo n.º 2
0
        static async Task Main(string[] args)
        {
            TransformationLogging.LoggerFactory = LoggerFactory.Create(builder => {
                builder.AddFilter("Microsoft", LogLevel.Warning)
                .AddFilter("System", LogLevel.Warning)
                .AddFilter("Microsoft.Health.Fhir.Transformation", LogLevel.Information)
                .AddConsole();
            });
            System.Net.ServicePointManager.DefaultConnectionLimit = 10 * 1024;

            ILogger logger = TransformationLogging.CreateLogger <Program>();

            var rootCommand = new RootCommand();

            var generateSchemaCommand = new Command("generate-schema")
            {
                new Option <string>("--clientId"),
                new Option <string>("--tenantId"),
                new Option <string>("--adlsAccount"),
                new Option <string>("--cdmFileSystem"),
                new Option <string>("--configurationContainer", getDefaultValue: () => "config"),
                new Option <string>("--clientSecret"),
                new Option <int>("--maxDepth", getDefaultValue: () => 3)
            };

            generateSchemaCommand.Handler = CommandHandler.Create <string, string, string, string, string, string, int>(
                async(clientId, tenantId, adlsAccount, cdmFileSystem, configurationContainer, clientSecret, maxDepth) =>
            {
                logger.LogInformation("Start to generate CDM schema.");
                ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

                StorageDefinitionLoader configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
                TabularMappingDefinition[] mappings  = configLoader.Load();

                AdlsCsvSink sink = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential);
                await sink.InitAsync();
                await sink.CreateFileSystemClientIfNotExistAsync();

                CdmCorpusDefinition defination        = InitAdlscdmCorpusDefinition(adlsAccount, "/" + cdmFileSystem, tenantId, clientId, clientSecret);
                CdmSchemaGenerator cdmSchemaGenerator = new CdmSchemaGenerator(defination);
                List <string> entities = await cdmSchemaGenerator.InitializeCdmFolderAsync(mappings, "adls");

                WriteActivityOutputs(entities);

                logger.LogInformation("Generate CDM schema completed.");
            });
            rootCommand.AddCommand(generateSchemaCommand);

            var transformDataCommand = new Command("transform-data")
            {
                new Option <string>("--clientId"),
                new Option <string>("--tenantId"),
                new Option <string>("--adlsAccount"),
                new Option <string>("--cdmFileSystem"),
                new Option <string>("--inputBlobUri"),
                new Option <string>("--configurationContainer"),
                new Option <string>("--clientSecret"),
                new Option <string>("--operationId"),
                new Option <string>("--maxDepth"),
            };

            Func <string, string, string, string, string, string, string, string, int, Task> transformDataAction =
                async(clientId, tenantId, adlsAccount, cdmFileSystem, inputBlobUri, configurationContainer, operationId, clientSecret, maxDepth) =>
            {
                logger.LogInformation("Start to transform data.");
                ClientSecretCredential credential = GetClientSecretCredential(tenantId, clientId, clientSecret);

                StorageDefinitionLoader    configLoader = new StorageDefinitionLoader(GetStorageServiceEndpoint(adlsAccount), configurationContainer, credential, maxDepth);
                TabularMappingDefinition[] mappings     = configLoader.Load();

                Uri     inputUri = new Uri(inputBlobUri);
                ISource source   = new StorageBlobNdjsonSource(inputUri, credential)
                {
                    ConcurrentCount = Environment.ProcessorCount * 2
                };

                string      fileName = Path.GetFileNameWithoutExtension(inputUri.AbsolutePath);
                AdlsCsvSink sink     = new AdlsCsvSink(adlsAccount, cdmFileSystem, credential)
                {
                    CsvFilePath = (string tableName) =>
                    {
                        return($"data/Local{tableName}/partition-data-{tableName}-{fileName}-{operationId}.csv");
                    },
                    ConcurrentCount = Environment.ProcessorCount * 2
                };

                TransformationExecutor executor = new TransformationExecutor(source,
                                                                             sink,
                                                                             mappings,
                                                                             new BasicFhirElementTabularTransformer());
                executor.ConcurrentCount = Environment.ProcessorCount * 2;
                IProgress <(int, int)> progressHandler = new Progress <(int, int)>(progress =>
                {
                    if (progress.Item1 % 100 == 0 || progress.Item2 % 100 == 0)
                    {
                        logger.LogInformation($"({progress.Item1} loaded, {progress.Item2} transformed) to CDM folder. {DateTime.UtcNow.ToLongTimeString()}");
                    }
                });

                await executor.ExecuteAsync(progressHandler);

                logger.LogInformation("Transform data complete.");
            };

            transformDataCommand.Handler = HandlerDescriptor.FromDelegate(transformDataAction).GetCommandHandler();
            rootCommand.AddCommand(transformDataCommand);

            await rootCommand.InvokeAsync(args);
        }
Ejemplo n.º 3
0
        public async Task GivenAStorageBlobNdjsonSource_WhenDownloadDataTimeout_OperationShouldBeRetried_Async()
        {
            if (string.IsNullOrEmpty(_testSettings.BlobUri))
            {
                return;
            }
            var    credential    = Program.GetClientSecretCredential(_testSettings.TenantId, _testSettings.ClientId, _testSettings.Secret);
            string containerName = Guid.NewGuid().ToString("N");
            string blobName      = Guid.NewGuid().ToString("N");

            Uri containerUri = new Uri(_testSettings.BlobUri + "/" + containerName);
            Uri docUri       = new Uri(_testSettings.BlobUri + "/" + containerName + "/" + blobName);
            BlobContainerClient containerClient = new BlobContainerClient(containerUri, credential);
            var source = new StorageBlobNdjsonSource(docUri, credential);

            try
            {
                await containerClient.CreateIfNotExistsAsync();

                var blobClient = containerClient.GetBlobClient(blobName);

                List <string> expectedResult = GenerateTestBlobAsync(blobClient).Result;

                Dictionary <long, int> enterRecord = new Dictionary <long, int>();
                source.BlockDownloadTimeoutRetryCount = 1;
                source.BlockDownloadTimeoutInSeconds  = 5;
                await source.OpenAsync();

                source._stream.DownloadDataFunc = async(client, range) =>
                {
                    if (!enterRecord.ContainsKey(range.Offset))
                    {
                        enterRecord[range.Offset] = 0;
                    }

                    if (enterRecord[range.Offset]++ < 1)
                    {
                        Thread.Sleep(TimeSpan.FromSeconds(10));
                    }

                    var downloadInfo = await client.DownloadAsync(range);

                    return(downloadInfo.Value.Content);
                };

                for (int i = 0; i < expectedResult.Count; ++i)
                {
                    var content = await source.ReadAsync();

                    Assert.Equal(expectedResult[i], content);
                }

                foreach (int count in enterRecord.Values)
                {
                    Assert.Equal(2, count);
                }
            }
            finally
            {
                await source.CloseAsync();

                await containerClient.DeleteIfExistsAsync();
            }
            return;
        }