public static void GivenAValidStructData_WhenPreprocess_CorrectResultShouldBeReturned()
        {
            JObject rawStructFormatData = new JObject
            {
                {
                    "text", new JObject
                    {
                        { "status", "generated" },
                        { "div", "Test div in text" },
                    }
                },
            };

            // Expected struct format fields are same with raw struct format fields.
            JObject expectedStructFormatResult = new JObject
            {
                {
                    "text", new JObject
                    {
                        { "status", "generated" },
                        { "div", "Test div in text" },
                    }
                },
            };

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var result = parquetDataProcessor.Preprocess(
                CreateTestJsonBatchData(rawStructFormatData),
                "Patient");

            Assert.True(JToken.DeepEquals(result.Values.First(), expectedStructFormatResult));
        }
        public static void GivenAValidStructChoiceTypeData_WhenPreprocess_CorrectResultShouldBeReturned()
        {
            JObject rawStructChoiceTypeData = new JObject
            {
                { "effectivePeriod", new JObject {
                      { "start", "1905-08-23" }
                  } },
            };

            // Struct choice data type
            JObject expectedStructChoiceTypeResult = new JObject
            {
                { "effective", new JObject {
                      { "period", new JObject {
                            { "start", "1905-08-23" }
                        } }
                  } },
            };

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);
            var result = parquetDataProcessor.Preprocess(
                CreateTestJsonBatchData(rawStructChoiceTypeData),
                "Observation");

            Assert.True(JToken.DeepEquals(result.Values.First(), expectedStructChoiceTypeResult));
        }
        public static void GivenAValidArrayData_WhenPreprocess_CorrectResultShouldBeReturned()
        {
            JObject rawArrayFormatData = new JObject
            {
                {
                    "name", new JArray
                    {
                        new JObject
                        {
                            { "use", "official" },
                            { "family", "Chalmers" },
                            { "given", new JArray {
                                  "Peter", "James"
                              } },
                        },
                        new JObject
                        {
                            { "use", "maiden" },
                            { "given", new JArray {
                                  "Jim"
                              } },
                        },
                    }
                },
            };

            // Expected array format fields are same with raw array format fields.
            JObject expectedArrayFormatResult = new JObject
            {
                {
                    "name", new JArray
                    {
                        new JObject
                        {
                            { "use", "official" },
                            { "family", "Chalmers" },
                            { "given", new JArray {
                                  "Peter", "James"
                              } },
                        },
                        new JObject
                        {
                            { "use", "maiden" },
                            { "given", new JArray {
                                  "Jim"
                              } },
                        },
                    }
                },
            };

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);
            var result = parquetDataProcessor.Preprocess(
                CreateTestJsonBatchData(rawArrayFormatData),
                "Patient");

            Assert.True(JToken.DeepEquals(result.Values.First(), expectedArrayFormatResult));
        }
        public static void GivenInvalidSchemaType_WhenPreprocess_ExceptionShouldBeReturned()
        {
            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            Assert.Throws <ParquetDataProcessorException>(
                () => parquetDataProcessor.Preprocess(
                    CreateTestJsonBatchData(_testPatient),
                    "UnsupportedSchemaType"));
        }
        public static async Task GivenInvalidSchemaType_WhenProcess_ExceptionShouldBeThrown()
        {
            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var jsonBatchData = new JsonBatchData(_testPatients);

            await Assert.ThrowsAsync <ParquetDataProcessorException>(
                () => parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("InvalidResourceType")));
        }
        public static void GivenAValidBasicSchema_WhenPreprocess_CorrectResultShouldBeReturned()
        {
            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var result = parquetDataProcessor.Preprocess(
                CreateTestJsonBatchData(_testPatient),
                "Patient");

            var expectedResult = TestUtils.LoadNdjsonData(Path.Combine(_expectTestDataFolder, "Expected_Processed_Patient.ndjson"));

            Assert.True(JToken.DeepEquals(result.Values.First(), expectedResult.First()));
        }
        public static async Task GivenAValidInputData_WhenProcess_CorrectResultShouldBeReturned()
        {
            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var jsonBatchData = new JsonBatchData(_testPatients);

            var resultBatchData = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient"));

            var resultStream = new MemoryStream();

            resultBatchData.Value.CopyTo(resultStream);

            var expectedResult = GetExpectedParquetStream(Path.Combine(_expectTestDataFolder, "Expected_Patient.parquet"));

            Assert.Equal(expectedResult.ToArray(), resultStream.ToArray());
        }
        public static async Task GivenInvalidJsonBatchData_WhenProcess_ExceptionShouldBeThrown()
        {
            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var invalidTestData = new JObject
            {
                { "resourceType", "Patient" },
                { "name", "Invalid field content, should be an array." },
            };

            var invalidJsonBatchData = new JsonBatchData(new List <JObject> {
                invalidTestData, invalidTestData
            });

            await Assert.ThrowsAsync <ParquetDataProcessorException>(() => parquetDataProcessor.ProcessAsync(invalidJsonBatchData, new ProcessParameters("Patient")));
        }
        public static void GivenInvalidData_WhenPreprocess_ExceptionShouldBeReturned()
        {
            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);
            var invalidFieldData     = new JObject
            {
                { "name", "Invalid data fields, should be array." },
            };

            Assert.Throws <ParquetDataProcessorException>(
                () => parquetDataProcessor.Preprocess(
                    CreateTestJsonBatchData(invalidFieldData),
                    "Patient").Values.Count());

            Assert.Throws <ParquetDataProcessorException>(
                () => parquetDataProcessor.Preprocess(
                    CreateTestJsonBatchData(null),
                    "Patient").Values.Count());
        }
        public static async Task GivenAValidMultipleLargeInputData_WhenProcess_CorrectResultShouldBeReturned()
        {
            var largePatientSingleSet = TestUtils.LoadNdjsonData(Path.Combine(_testDataFolder, "Large_Patient.ndjson"));
            var largeTestData         = Enumerable.Repeat(largePatientSingleSet, 100).SelectMany(x => x);

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var jsonBatchData = new JsonBatchData(largeTestData);

            var resultBatchData = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient"));

            var resultStream = new MemoryStream();

            resultBatchData.Value.CopyTo(resultStream);

            var expectedResult = GetExpectedParquetStream(Path.Combine(_expectTestDataFolder, "Expected_Patient_MultipleLargeSize.parquet"));

            Assert.Equal(expectedResult.ToArray(), resultStream.ToArray());
        }
        public static async Task GivenDataAllRecordsLengthLargerThanBlockSize_WhenProcess_NullResultShouldReturned()
        {
            // Set BlockSize small here, only shortPatientData can be retained an be converting to parquet result.
            var arrowConfigurationOptions = Options.Create(new ArrowConfiguration()
            {
                ReadOptions = new ArrowReadOptionsConfiguration()
                {
                    BlockSize = 50
                },
            });

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var testData      = new List <JObject>(_testPatients);
            var jsonBatchData = new JsonBatchData(testData);

            StreamBatchData result = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient"));

            Assert.Null(result);
        }
        public static async Task GivenDataWithSomeRecordsLengthLargerThanBlockSize_WhenProcess_LargeRecordsShouldBeIgnored()
        {
            var shortPatientData = new JObject
            {
                { "resourceType", "Patient" },
                { "id", "example" },
            };

            var testData = new List <JObject>(_testPatients)
            {
                shortPatientData
            };

            // Set BlockSize small here, only shortPatientData can be retained an be converting to parquet result.
            var arrowConfigurationOptions = Options.Create(new ArrowConfiguration()
            {
                ReadOptions = new ArrowReadOptionsConfiguration()
                {
                    BlockSize = 50
                },
            });

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, arrowConfigurationOptions, _nullParquetDataProcessorLogger);

            var jsonBatchData = new JsonBatchData(testData);

            var resultBatchData = await parquetDataProcessor.ProcessAsync(jsonBatchData, new ProcessParameters("Patient"));

            var resultStream = new MemoryStream();

            resultBatchData.Value.CopyTo(resultStream);

            var expectedResult = GetExpectedParquetStream(Path.Combine(_expectTestDataFolder, "Expected_Patient_IgnoreLargeLength.parquet"));

            Assert.Equal(expectedResult.ToArray(), resultStream.ToArray());
        }
        public static void GivenAValidDataWithDeepStructField_WhenPreprocess_DeepFieldsShouldBeWrappedIntoJsonString()
        {
            JObject rawDeepFieldsData = new JObject
            {
                {
                    "contact", new JArray
                    {
                        new JObject
                        {
                            {
                                "relationship", new JArray
                                {
                                    new JObject
                                    {
                                        {
                                            "coding", new JArray
                                            {
                                                new JObject
                                                {
                                                    { "system", "http://terminology.hl7.org/CodeSystem/v2-0131" },
                                                    { "code", "E" },
                                                },
                                            }
                                        },
                                    },
                                }
                            },
                        },
                    }
                },
            };

            JObject expectedJsonStringFieldsResult = new JObject
            {
                {
                    "contact", new JArray
                    {
                        new JObject
                        {
                            {
                                "relationship", new JArray
                                {
                                    new JObject
                                    {
                                        {
                                            "coding", "[{\"system\":\"http://terminology.hl7.org/CodeSystem/v2-0131\",\"code\":\"E\"}]"
                                        },
                                    },
                                }
                            },
                        },
                    }
                },
            };

            var parquetDataProcessor = new ParquetDataProcessor(_fhirSchemaManager, _arrowConfigurationOptions, _nullParquetDataProcessorLogger);
            var result = parquetDataProcessor.Preprocess(
                CreateTestJsonBatchData(rawDeepFieldsData),
                "Patient");

            Assert.True(JToken.DeepEquals(result.Values.First(), expectedJsonStringFieldsResult));
        }