예제 #1
0
        public void TidyExtractor_InvalidFilePaths_ThrowsArgumentException()
        {
            // Arrange
            TidyDataCsvExtractor sut;

            // Act and Assert
            Assert.Throws <ArgumentException>(() => sut = new TidyDataCsvExtractor(
                                                  @"Nodes/Manual/Assets/foo.csv",
                                                  pathToFileWithValidDictionaryV1));
        }
예제 #2
0
        public void Extract_Nulls_ReturnsAllValues()
        {
            // Arrange
            TidyDataCsvExtractor sut = new TidyDataCsvExtractor(
                pathToFileWithNullsV1,
                pathToFileWithValidDictionaryV1);
            int expectedDocs = 10;

            // Act
            TidyData actual = sut.Extract <HandHarvestYieldV1>();

            // Assert
            Assert.Equal(expectedDocs, actual.Observations.Count);
        }
예제 #3
0
        public void Extract_ValidData_ReturnsExpected()
        {
            // Arrange
            TidyDataCsvExtractor sut = new TidyDataCsvExtractor(
                pathToFileWithValidDataSlimV1,
                pathToFileWithValidDictionaryV1);
            TidyData expected = ManualArranger
                                .GetTidyDataDerivedFromActualDataV1();

            // Act
            TidyData actual = sut.Extract <HandHarvestYieldV1>();

            // Assert
            Assert.Equal(expected, actual);
        }
        public async Task SoilGridPointSurveyV1ToCosmos_ActualData_CreatesExpectedRecords()
        {
            // Arrange
            var extractor = new TidyDataCsvExtractor(
                pathToFileWithValidSoilGridPointSurveyV1Data,
                pathToFileWithValidSoilGridPointSurveyV1Dictionary);
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "LocalProcess",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CookEastSoilGridPointSurvey",
                "0.1",
                "",
                DateTime.UtcNow);
            var transformer = new CosmosDBSqlApiSampleV2Transformer
                              <SoilGridPointSurveyV1, SoilSample>(
                new MapFromSoilGridPointSurveyToSoilSample(),
                "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                etlEvent.Id,
                "CookEastSoilGridPointSurvey",
                "CookEast",
                "SoilSample");

            var loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            // Act
            TidyData          extracted   = extractor.Extract <SoilGridPointSurveyV1>();
            List <SoilSample> transformed = transformer.Transform(extracted);

            StoredProcedureResponse <bool>[] results = await loader.LoadBulk(transformed);

            Assert.Equal(30, transformed.Count);
            Assert.NotEmpty(results);
        }
예제 #5
0
        static async Task MainAsync()
        {
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "LocalProcess",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CookEastSoilGridPointSurvey",
                "1.0",
                "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB",
                DateTime.UtcNow);

            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.json");

            var configuration = builder.Build();

            JsonSerializerSettings serializerSettings = new JsonSerializerSettings
            {
                NullValueHandling = NullValueHandling.Ignore
            };

            string data = configuration["PathToData"];
            string dict = configuration["PathToDictionary"];

            if (!File.Exists(data) |
                !File.Exists(dict))
            {
                throw new FileNotFoundException();
            }

            etlEvent.Inputs.Add(data);
            etlEvent.Inputs.Add(dict);

            DocumentClient client;

            try
            {
                client = new DocumentClient(
                    new Uri(
                        configuration["CosmosServiceEndpoint"]),
                    configuration["CosmosAuthKey"],
                    serializerSettings);
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error creating DocumentClient: {e.Message}");
                throw new Exception("Error creating DocumentClient", e);
            }

            var extractor = new TidyDataCsvExtractor(
                configuration["PathToData"],
                configuration["PathToDictionary"]);

            var transformer = new CosmosDBSqlApiSampleV2Transformer
                              <SoilGridPointSurveyV1, SoilSample>(
                new MapFromSoilGridPointSurveyToSoilSample(),
                "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                etlEvent.Id,
                "CookEastSoilGridPointSurvey",
                "CookEast",
                "SoilSample");

            var loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            try
            {
                TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>();

                etlEvent.Logs.Add(
                    $"Extracted TidyData with {extracted.Observations.Count} observations");

                List <SoilSample> transformed = transformer.Transform(extracted);

                etlEvent.Logs.Add(
                    $"Transformed TidyData to {transformed.Count} SoilSamples");

                int docsLoaded = 0;
                int docsError  = 0;
                foreach (SoilSample sample in transformed)
                {
                    ResourceResponse <Document> result =
                        await loader.LoadNoReplace(sample);

                    if (result.StatusCode == HttpStatusCode.Created)
                    {
                        etlEvent.Outputs.Add(result.Resource.Id);
                        docsLoaded++;
                    }
                    else
                    {
                        docsError++;
                    }

                    // Notify data written then sleep to conserve Cosmos RU
                    Console.Write(".");
                    Thread.Sleep(40);
                }

                etlEvent.Logs.Add(
                    $"Loaded {docsLoaded.ToString()} SoilSamples");
                etlEvent.Logs.Add(
                    $"Error loading {docsError.ToString()} SoilSamples");
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error in ETL pipeline: {e.Message}");
                throw new Exception("Error in ETL pipeline", e);
            }
            finally
            {
                etlEvent.DateTimeEnd = DateTime.UtcNow;
                ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent);
            }
        }