public static void GivenAValidStructData_WhenPreprocess_CorrectResultShouldBeReturned() { JObject rawStructFormatData = new JObject { { "text", new JObject { { "status", "generated" }, { "div", "Test div in text" }, } }, }; // Expected struct format fields are same with raw struct format fields. JObject expectedStructFormatResult = new JObject { { "text", new JObject { { "status", "generated" }, { "div", "Test div in text" }, } }, }; var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var result = parquetDataProcessor.Preprocess( CreateTestJsonBatchData(rawStructFormatData), "Patient"); Assert.True(JToken.DeepEquals(result.Values.First(), expectedStructFormatResult)); }
public static void GivenAValidStructChoiceTypeData_WhenPreprocess_CorrectResultShouldBeReturned() { JObject rawStructChoiceTypeData = new JObject { { "effectivePeriod", new JObject { { "start", "1905-08-23" } } }, }; // Struct choice data type JObject expectedStructChoiceTypeResult = new JObject { { "effective", new JObject { { "period", new JObject { { "start", "1905-08-23" } } } } }, }; var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var result = parquetDataProcessor.Preprocess( CreateTestJsonBatchData(rawStructChoiceTypeData), "Observation"); Assert.True(JToken.DeepEquals(result.Values.First(), expectedStructChoiceTypeResult)); }
public static void GivenAValidArrayData_WhenPreprocess_CorrectResultShouldBeReturned() { JObject rawArrayFormatData = new JObject { { "name", new JArray { new JObject { { "use", "official" }, { "family", "Chalmers" }, { "given", new JArray { "Peter", "James" } }, }, new JObject { { "use", "maiden" }, { "given", new JArray { "Jim" } }, }, } }, }; // Expected array format fields are same with raw array format fields. JObject expectedArrayFormatResult = new JObject { { "name", new JArray { new JObject { { "use", "official" }, { "family", "Chalmers" }, { "given", new JArray { "Peter", "James" } }, }, new JObject { { "use", "maiden" }, { "given", new JArray { "Jim" } }, }, } }, }; var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var result = parquetDataProcessor.Preprocess( CreateTestJsonBatchData(rawArrayFormatData), "Patient"); Assert.True(JToken.DeepEquals(result.Values.First(), expectedArrayFormatResult)); }
public static void GivenInvalidSchemaType_WhenPreprocess_ExceptionShouldBeReturned() { var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); Assert.Throws <ParquetDataProcessorException>( () => parquetDataProcessor.Preprocess( CreateTestJsonBatchData(_testPatient), "UnsupportedSchemaType")); }
public static async Task GivenInvalidSchemaType_WhenProcess_ExceptionShouldBeThrown() { var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var jsonBatchData = new JsonBatchData(_testPatients); await Assert.ThrowsAsync <ParquetDataProcessorException>( () => parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("InvalidResourceType"))); }
public static void GivenAValidBasicSchema_WhenPreprocess_CorrectResultShouldBeReturned() { var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var result = parquetDataProcessor.Preprocess( CreateTestJsonBatchData(_testPatient), "Patient"); var expectedResult = TestUtils.LoadNdjsonData(Path.Combine(_expectTestDataFolder, "Expected_Processed_Patient.ndjson")); Assert.True(JToken.DeepEquals(result.Values.First(), expectedResult.First())); }
public static async Task GivenAValidInputData_WhenProcess_CorrectResultShouldBeReturned() { var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var jsonBatchData = new JsonBatchData(_testPatients); var resultBatchData = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient")); var resultStream = new MemoryStream(); resultBatchData.Value.CopyTo(resultStream); var expectedResult = GetExpectedParquetStream(Path.Combine(_expectTestDataFolder, "Expected_Patient.parquet")); Assert.Equal(expectedResult.ToArray(), resultStream.ToArray()); }
public static async Task GivenInvalidJsonBatchData_WhenProcess_ExceptionShouldBeThrown() { var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var invalidTestData = new JObject { { "resourceType", "Patient" }, { "name", "Invalid field content, should be an array." }, }; var invalidJsonBatchData = new JsonBatchData(new List <JObject> { invalidTestData, invalidTestData }); await Assert.ThrowsAsync <ParquetDataProcessorException>(() => parquetDataProcessor.ProcessAsync(invalidJsonBatchData, new ProcessParameters("Patient"))); }
public static void GivenInvalidData_WhenPreprocess_ExceptionShouldBeReturned() { var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var invalidFieldData = new JObject { { "name", "Invalid data fields, should be array." }, }; Assert.Throws <ParquetDataProcessorException>( () => parquetDataProcessor.Preprocess( CreateTestJsonBatchData(invalidFieldData), "Patient").Values.Count()); Assert.Throws <ParquetDataProcessorException>( () => parquetDataProcessor.Preprocess( CreateTestJsonBatchData(null), "Patient").Values.Count()); }
public static async Task GivenAValidMultipleLargeInputData_WhenProcess_CorrectResultShouldBeReturned() { var largePatientSingleSet = TestUtils.LoadNdjsonData(Path.Combine(_testDataFolder, "Large_Patient.ndjson")); var largeTestData = Enumerable.Repeat(largePatientSingleSet, 100).SelectMany(x => x); var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var jsonBatchData = new JsonBatchData(largeTestData); var resultBatchData = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient")); var resultStream = new MemoryStream(); resultBatchData.Value.CopyTo(resultStream); var expectedResult = GetExpectedParquetStream(Path.Combine(_expectTestDataFolder, "Expected_Patient_MultipleLargeSize.parquet")); Assert.Equal(expectedResult.ToArray(), resultStream.ToArray()); }
public static async Task GivenDataAllRecordsLengthLargerThanBlockSize_WhenProcess_NullResultShouldReturned() { // Set BlockSize small here, only shortPatientData can be retained an be converting to parquet result. var arrowConfigurationOptions = Options.Create(new ArrowConfiguration() { ReadOptions = new ArrowReadOptionsConfiguration() { BlockSize = 50 }, }); var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, arrowConfigurationOptions, _nullParquetDataProcessorLogger); var testData = new List <JObject>(_testPatients); var jsonBatchData = new JsonBatchData(testData); StreamBatchData result = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient")); Assert.Null(result); }
public static async Task GivenDataWithSomeRecordsLengthLargerThanBlockSize_WhenProcess_LargeRecordsShouldBeIgnored() { var shortPatientData = new JObject { { "resourceType", "Patient" }, { "id", "example" }, }; var testData = new List <JObject>(_testPatients) { shortPatientData }; // Set BlockSize small here, only shortPatientData can be retained an be converting to parquet result. var arrowConfigurationOptions = Options.Create(new ArrowConfiguration() { ReadOptions = new ArrowReadOptionsConfiguration() { BlockSize = 50 }, }); var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, arrowConfigurationOptions, _nullParquetDataProcessorLogger); var jsonBatchData = new JsonBatchData(testData); var resultBatchData = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient")); var resultStream = new MemoryStream(); resultBatchData.Value.CopyTo(resultStream); var expectedResult = GetExpectedParquetStream(Path.Combine(_expectTestDataFolder, "Expected_Patient_IgnoreLargeLength.parquet")); Assert.Equal(expectedResult.ToArray(), resultStream.ToArray()); }
public static void GivenAValidDataWithDeepStructField_WhenPreprocess_DeepFieldsShouldBeWrappedIntoJsonString() { JObject rawDeepFieldsData = new JObject { { "contact", new JArray { new JObject { { "relationship", new JArray { new JObject { { "coding", new JArray { new JObject { { "system", "http://terminology.hl7.org/CodeSystem/v2-0131" }, { "code", "E" }, }, } }, }, } }, }, } }, }; JObject expectedJsonStringFieldsResult = new JObject { { "contact", new JArray { new JObject { { "relationship", new JArray { new JObject { { "coding", "[{\"system\":\"http://terminology.hl7.org/CodeSystem/v2-0131\",\"code\":\"E\"}]" }, }, } }, }, } }, }; var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger); var result = parquetDataProcessor.Preprocess( CreateTestJsonBatchData(rawDeepFieldsData), "Patient"); Assert.True(JToken.DeepEquals(result.Values.First(), expectedJsonStringFieldsResult)); }