public async Task <string> WriteAsync( StreamBatchData data, string jobId, int taskIndex, int partId, DateTimeOffset dateTime, CancellationToken cancellationToken = default) { EnsureArg.IsNotNull(data, nameof(data)); EnsureArg.IsNotNull(jobId, nameof(jobId)); var schemaType = data.SchemaType; var blobName = GetDataFileName(dateTime, schemaType, jobId, taskIndex, partId); var blobUrl = await _containerClient.UpdateBlobAsync(blobName, data.Value, cancellationToken); _logger.LogInformation($"Write stream batch data to {blobUrl} successfully."); return(blobUrl); }
public static async Task GivenDataAllRecordsLengthLargerThanBlockSize_WhenProcess_NullResultShouldReturned() { // Set BlockSize small here, only shortPatientData can be retained an be converting to parquet result. var arrowConfigurationOptions = Options.Create(new ArrowConfiguration() { ReadOptions = new ArrowReadOptionsConfiguration() { BlockSize = 50 }, }); var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, arrowConfigurationOptions, _nullParquetDataProcessorLogger); var testData = new List <JObject>(_testPatients); var jsonBatchData = new JsonBatchData(testData); StreamBatchData result = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient")); Assert.Null(result); }