public void GivenNullOrEmptyInputStream_WhenConvertToParquetStream_ExceptionShouldBeThrown()
        {
            var parquetConverterWrapper = new ParquetConverterWrapper(TestConstants.TestSchemaMap);

            Assert.Throws <ArgumentException>(() => parquetConverterWrapper.ConvertToParquetStream("Patient", null));
            Assert.Throws <ArgumentException>(() => parquetConverterWrapper.ConvertToParquetStream("Patient", new MemoryStream(Encoding.UTF8.GetBytes(string.Empty))));
        }
        public void GivenValidInputData_WhenConvertToParquetStream_CorrectStreamShouldBeReturned()
        {
            var parquetConverterWrapper = new ParquetConverterWrapper(TestConstants.TestSchemaMap, new ArrowConfiguration());
            var inputStream             = new MemoryStream(Encoding.UTF8.GetBytes("{\"resourceType\":\"Patient\",\"id\":\"example\"}"));
            var outputStream            = parquetConverterWrapper.ConvertToParquetStream("Patient", inputStream);

            Assert.True(outputStream.Length > 0);
        }
Example #3
0
        public Task <StreamBatchData> ProcessAsync(
            JsonBatchData inputData,
            ProcessParameters processParameters,
            CancellationToken cancellationToken = default)
        {
            cancellationToken.ThrowIfCancellationRequested();

            // Preprocess data
            JsonBatchData preprocessedData = Preprocess(inputData, processParameters.SchemaType, cancellationToken);

            // Get FHIR schema for the input data.
            var schema = _fhirSchemaManager.GetSchema(processParameters.SchemaType);

            if (schema == null)
            {
                _logger.LogError($"The FHIR schema node could not be found for schema type '{processParameters.SchemaType}'.");
                throw new ParquetDataProcessorException($"The FHIR schema node could not be found for schema type '{processParameters.SchemaType}'.");
            }

            var inputStream = ConvertJsonDataToStream(processParameters.SchemaType, preprocessedData.Values);

            if (inputStream == null)
            {
                // Return null if no data has been converted.
                return(Task.FromResult <StreamBatchData>(null));
            }

            // Convert JSON data to parquet stream.
            try
            {
                var resultStream = _parquetConverterWrapper.ConvertToParquetStream(processParameters.SchemaType, inputStream);
                return(Task.FromResult(
                           new StreamBatchData(
                               resultStream,
                               preprocessedData.Values.Count(),
                               processParameters.SchemaType)
                           ));
            }
            catch (Exception ex)
            {
                _logger.LogError($"Exception happened when converting input data to parquet for \"{processParameters.SchemaType}\".");
                throw new ParquetDataProcessorException($"Exception happened when converting input data to parquet for \"{processParameters.SchemaType}\".", ex);
            }
        }
        public void GivenUnsupportedResourceType_WhenConvertToParquetStream_ExceptionShouldBeThrown(string resourceType)
        {
            var parquetConverterWrapper = new ParquetConverterWrapper(TestConstants.TestSchemaMap);

            Assert.Throws <ArgumentException>(() => parquetConverterWrapper.ConvertToParquetStream(resourceType, new MemoryStream(Encoding.UTF8.GetBytes("content"))));
        }