Exemple #1
0
        public void Transform_ValidDataHandHarvestYieldV1_ReturnsExpected()
        {
            // Arrange

            CosmosDBSqlApiSampleV2Transformer
            <HandHarvestYieldV1, VegetationSample> sut =
                new CosmosDBSqlApiSampleV2Transformer
                <HandHarvestYieldV1, VegetationSample>(
                    new MapFromHandHarvestYieldV1ToVegetationSample(),
                    "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                    "",
                    "CookEastCropHandHarvest",
                    "CookEast",
                    "VegetationSample");

            TidyData tidyData =
                ManualArranger.GetTidyDataDerivedFromActualDataV1();

            List <VegetationSample> expected =
                ManualArranger.GeHandHarvestSampleDerivedFromActualDataV1();

            // Act
            List <VegetationSample> actual = new List <VegetationSample>()
            {
                sut.Transform(tidyData).First()
            };

            // Assert
            Assert.Equal(expected.Count, actual.Count);
            Assert.True(ComparerUtil.AreVegetationSamplesRoughlyEqual(
                            expected, actual));
        }
        public TidyData Extract <T>() where T : IObservation
        {
            TidyData dataSet = new TidyData();

            dataSet.Metadata     = ExtractMetadata();
            dataSet.Observations = ExtractObservations <T>()
                                   .Cast <IObservation>().ToList();

            return(dataSet);
        }
Exemple #3
0
        public static TidyData GetTidyDataWithNullsV1()
        {
            TidyData tidy = new TidyData()
            {
                Metadata     = GetMetadataDerivedFromActualDataV1(),
                Observations = GetObservationsWithNullsV1()
            };

            return(tidy);
        }
        public void Transform_GenericHasLessPropertiesThanMeasurements_ReturnsOnlyGenericProperties()
        {
            // Arrange
            List <MeasurementV2> measurements = LoggerNetArranger.GetMeasurementV2TwoVariablesMultipleTimesteps();
            var sut = new ManualTidyDataTransformer();

            // Act
            TidyData actual = sut.Transform <DataTableOneVar>(measurements);

            // Assert
            Assert.Equal(3, actual.Observations.Count);
            Assert.Equal(2, actual.Metadata.Variables.Count);
        }
Exemple #5
0
        public void Extract_Nulls_ReturnsAllValues()
        {
            // Arrange
            TidyDataCsvExtractor sut = new TidyDataCsvExtractor(
                pathToFileWithNullsV1,
                pathToFileWithValidDictionaryV1);
            int expectedDocs = 10;

            // Act
            TidyData actual = sut.Extract <HandHarvestYieldV1>();

            // Assert
            Assert.Equal(expectedDocs, actual.Observations.Count);
        }
Exemple #6
0
        public void Extract_ValidData_ReturnsExpected()
        {
            // Arrange
            TidyDataCsvExtractor sut = new TidyDataCsvExtractor(
                pathToFileWithValidDataSlimV1,
                pathToFileWithValidDictionaryV1);
            TidyData expected = ManualArranger
                                .GetTidyDataDerivedFromActualDataV1();

            // Act
            TidyData actual = sut.Extract <HandHarvestYieldV1>();

            // Assert
            Assert.Equal(expected, actual);
        }
        public void Transform_ValidData_ExpectedResults()
        {
            // Arrange
            List <MeasurementV2> measurements = LoggerNetArranger.GetMeasurementV2TwoVariablesMultipleTimesteps();
            var sut = new ManualTidyDataTransformer();

            // Act
            TidyData actual = sut.Transform <DataTableTwoVar>(measurements);

            // Assert
            List <DataTableTwoVar> actualObs = actual.Observations.Cast <DataTableTwoVar>().ToList();

            Assert.Equal(3, actual.Observations.Count);
            Assert.Equal(3, actual.Metadata.Variables.Count);
            Assert.NotNull(actualObs.Where(m => m.ParDensityTsAvg == 1806.077).FirstOrDefault());
        }
Exemple #8
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="data">Data to be written to CSV file</param>
        /// <param name="dirPath">Directory to write file to. Will be created if doesn't exist.</param>
        /// <param name="fileName">Name of the file, without extension. The current date, local to the machine, in ISO 8601 format, will be added.</param>
        /// <returns>A string containing the name of the data file</returns>
        public string LoadToFile(TidyData data, string dirPath, string fileName)
        {
            if (!Directory.Exists(dirPath))
            {
                Directory.CreateDirectory(dirPath);
            }

            DateTime dt = DateTime.Now;

            string dataFileName =
                $"{fileName}_{dt.ToString("yyyyMMdd")}.csv";
            string dictFileName =
                $"{fileName}_{dt.ToString("yyyyMMdd")}_Dictionary.csv";

            using (var writer = new StreamWriter(
                       Path.Combine(dirPath, dataFileName),
                       false,
                       Encoding.UTF8))
                using (var csv = new CsvWriter(writer))
                {
                    // Format datetime strings
                    csv.Configuration.CultureInfo = CultureInfo.InvariantCulture;

                    csv.Configuration.TypeConverterOptionsCache.GetOptions <DateTimeOffset>().Formats = new[] { "yyyy-MM-ddTHH:mm:ssK" };
                    csv.Configuration.TypeConverterOptionsCache.GetOptions <DateTime>().Formats       = new[] { "yyyy-MM-ddTHH:mm:ssK" };

                    // Need to convert list of interface to list of obj: https://stackoverflow.com/a/54795960/1621156
                    List <object> objects = new List <object>();
                    foreach (var observation in data.Observations)
                    {
                        objects.Add((object)observation);
                    }
                    csv.WriteRecords(objects);
                }

            using (var writer = new StreamWriter(
                       Path.Combine(dirPath, dictFileName),
                       false,
                       Encoding.UTF8))
                using (var csv = new CsvWriter(writer))
                {
                    csv.WriteRecords(data.Metadata.Variables);
                }

            return(dataFileName);
        }
Exemple #9
0
        public List <U> Transform(TidyData tidyData)
        {
            List <U> samples = new List <U>();

            foreach (T observation in tidyData.Observations)
            {
                // Maps class specific data, returns null if not able to map
                U sample = Map.GetSample(observation);

                if (sample == null)
                {
                    continue;
                }

                sample.Type           = DocumentType;
                sample.Project        = Project;
                sample.AreaOfInterest = AreaOfInterest;
                sample.PartitionKey   =
                    $"{sample.Type}_{sample.AreaOfInterest}_{sample.Name}";
                sample.Schema       = Schema;
                sample.Measurements = new List <MeasurementV2>();

                foreach (Variable variable in tidyData.Metadata.Variables)
                {
                    // Create MeasurementV2s
                    // Use a mapper that defines the variables to keep
                    // if(variable in mappers.VariablesToKeep)
                    MeasurementV2 measurement =
                        CreateMeasurementFromVariable(
                            variable,
                            observation,
                            tidyData.Metadata);

                    if (measurement != null)
                    {
                        sample.Measurements.Add(measurement);
                    }
                }

                samples.Add(sample);
            }

            return(samples);
        }
Exemple #10
0
        public async void ExtractMeasurementV2FromCosmosTransformToTidyWriteToCsv()
        {
            // Arrange
            string uri      = System.Configuration.ConfigurationManager.AppSettings["cosmosUri"];
            string key      = System.Configuration.ConfigurationManager.AppSettings["cosmosKey"];
            string dirPath  = @"Output";
            string fileName = "test";

            var extractor   = new CafDbExtractor(uri, key);
            var transformer = new ManualTidyDataTransformer();
            var loader      = new TidyDataCsvLoader();

            // Act
            List <MeasurementV2> measurements =
                await extractor.ExtractMeasurementsV2(
                    "CafMeteorologyEcTower",
                    "CookEast",
                    "ParDensityTsAvg",
                    "2019-12-12",
                    "2019-12-13");

            TidyData tidyData =
                transformer.Transform <ParDataTable>(measurements);

            loader.LoadToFile(tidyData, dirPath, fileName);

            // Assert
            var files = Directory.GetFiles(dirPath, $"{fileName}*");

            Assert.Equal(2, files.Length);

            // Cleanup
            if (files.Length > 0)
            {
                foreach (var file in files)
                {
                    File.Delete(file);
                }
            }
        }
Exemple #11
0
        public void Transform_NullValuesHandHarvestYieldV1_ThrowsArgumentNullException()
        {
            CosmosDBSqlApiSampleV2Transformer
            <HandHarvestYieldV1, VegetationSample> sut =
                new CosmosDBSqlApiSampleV2Transformer
                <HandHarvestYieldV1, VegetationSample>(
                    new MapFromHandHarvestYieldV1ToVegetationSample(),
                    "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                    "",
                    "CookEastCropHandHarvest",
                    "CookEast",
                    "VegetationSample");

            TidyData tidyData =
                ManualArranger.GetTidyDataWithNullsV1();

            // Act
            Action act = () => sut.Transform(tidyData);

            // Assert
            Assert.Throws <ArgumentNullException>(act);
        }
        public async Task SoilGridPointSurveyV1ToCosmos_ActualData_CreatesExpectedRecords()
        {
            // Arrange
            var extractor = new TidyDataCsvExtractor(
                pathToFileWithValidSoilGridPointSurveyV1Data,
                pathToFileWithValidSoilGridPointSurveyV1Dictionary);
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "LocalProcess",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CookEastSoilGridPointSurvey",
                "0.1",
                "",
                DateTime.UtcNow);
            var transformer = new CosmosDBSqlApiSampleV2Transformer
                              <SoilGridPointSurveyV1, SoilSample>(
                new MapFromSoilGridPointSurveyToSoilSample(),
                "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                etlEvent.Id,
                "CookEastSoilGridPointSurvey",
                "CookEast",
                "SoilSample");

            var loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            // Act
            TidyData          extracted   = extractor.Extract <SoilGridPointSurveyV1>();
            List <SoilSample> transformed = transformer.Transform(extracted);

            StoredProcedureResponse <bool>[] results = await loader.LoadBulk(transformed);

            Assert.Equal(30, transformed.Count);
            Assert.NotEmpty(results);
        }
Exemple #13
0
        public static TidyData GetTidyDataDerivedFromMockDateTimeObservation()
        {
            TidyData td = new TidyData();

            td.Metadata = new Metadata()
            {
                Variables = new List <Variable>()
                {
                    new Variable()
                    {
                        FieldName = "DateTimeUtc", Units = "unitless", Description = ""
                    }
                }
            };
            td.Observations = new List <IObservation>()
            {
                new DateTimeObservation()
                {
                    DateTimeUtc = new DateTimeOffset(new DateTime(2019, 12, 01, 8, 15, 00, DateTimeKind.Utc))
                }
            };

            return(td);
        }
Exemple #14
0
        static async Task MainAsync()
        {
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "LocalProcess",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CookEastSoilGridPointSurvey",
                "1.0",
                "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB",
                DateTime.UtcNow);

            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.json");

            var configuration = builder.Build();

            JsonSerializerSettings serializerSettings = new JsonSerializerSettings
            {
                NullValueHandling = NullValueHandling.Ignore
            };

            string data = configuration["PathToData"];
            string dict = configuration["PathToDictionary"];

            if (!File.Exists(data) |
                !File.Exists(dict))
            {
                throw new FileNotFoundException();
            }

            etlEvent.Inputs.Add(data);
            etlEvent.Inputs.Add(dict);

            DocumentClient client;

            try
            {
                client = new DocumentClient(
                    new Uri(
                        configuration["CosmosServiceEndpoint"]),
                    configuration["CosmosAuthKey"],
                    serializerSettings);
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error creating DocumentClient: {e.Message}");
                throw new Exception("Error creating DocumentClient", e);
            }

            var extractor = new TidyDataCsvExtractor(
                configuration["PathToData"],
                configuration["PathToDictionary"]);

            var transformer = new CosmosDBSqlApiSampleV2Transformer
                              <SoilGridPointSurveyV1, SoilSample>(
                new MapFromSoilGridPointSurveyToSoilSample(),
                "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                etlEvent.Id,
                "CookEastSoilGridPointSurvey",
                "CookEast",
                "SoilSample");

            var loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            try
            {
                TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>();

                etlEvent.Logs.Add(
                    $"Extracted TidyData with {extracted.Observations.Count} observations");

                List <SoilSample> transformed = transformer.Transform(extracted);

                etlEvent.Logs.Add(
                    $"Transformed TidyData to {transformed.Count} SoilSamples");

                int docsLoaded = 0;
                int docsError  = 0;
                foreach (SoilSample sample in transformed)
                {
                    ResourceResponse <Document> result =
                        await loader.LoadNoReplace(sample);

                    if (result.StatusCode == HttpStatusCode.Created)
                    {
                        etlEvent.Outputs.Add(result.Resource.Id);
                        docsLoaded++;
                    }
                    else
                    {
                        docsError++;
                    }

                    // Notify data written then sleep to conserve Cosmos RU
                    Console.Write(".");
                    Thread.Sleep(40);
                }

                etlEvent.Logs.Add(
                    $"Loaded {docsLoaded.ToString()} SoilSamples");
                etlEvent.Logs.Add(
                    $"Error loading {docsError.ToString()} SoilSamples");
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error in ETL pipeline: {e.Message}");
                throw new Exception("Error in ETL pipeline", e);
            }
            finally
            {
                etlEvent.DateTimeEnd = DateTime.UtcNow;
                ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent);
            }
        }
Exemple #15
0
        // https://stackoverflow.com/a/10445840/1621156
        /// <summary>
        /// Transforms a list of MeasurementsV2 to TidyData
        ///
        /// Assumes measurements are timeseries and spreads on DateTime. For spreading on other keys, use overloaded functions
        /// Assumes DateTime is in UTC
        /// </summary>
        /// <param name="measurements"></param>
        /// <returns></returns>
        public TidyData Transform <T>(List <MeasurementV2> measurements)
            where T : IObservation
        {
            if (measurements.Count == 0)
            {
                throw new ArgumentNullException("No measurements to convert");
            }

            if (typeof(T).GetProperty("DateTimeUtc") == null)
            {
                throw new ArgumentException("Generic class needs a 'DateTimeUtc' property");
            }

            TidyData tidyData = new TidyData();

            tidyData.Metadata = new Metadata()
            {
                Variables = new List <Variable>()
                {
                    new Variable()
                    {
                        FieldName   = "DateTimeUtc",
                        Units       = "unitless",
                        Description = "Date and time when measurement was collected, in UTC"
                    }
                }
            };

            // Build metadata
            // TODO: Add m.Description, once it's implemented in schema
            var variables = measurements
                            .Select(m => new { m.Name, m.PhysicalQuantities.First().Unit })
                            .Distinct();

            foreach (var variable in variables)
            {
                if (typeof(T).GetProperty(variable.Name) != null)
                {
                    tidyData.Metadata.Variables.Add(
                        new Variable()
                    {
                        FieldName = variable.Name,
                        Units     = variable.Unit
                    });
                }
            }

            // Build observations
            var groupedTypes = measurements
                               .GroupBy(m => m.DateTime)
                               .ToList();

            List <IObservation> observations = new List <IObservation>();

            foreach (var group in groupedTypes)
            {
                var observation = (T)Activator.CreateInstance(typeof(T));

                DateTime dateTimeUtc = group.Key ?? DateTime.MinValue;
                dateTimeUtc = DateTime.SpecifyKind(dateTimeUtc, DateTimeKind.Utc);

                observation
                .GetType()
                .GetProperty("DateTimeUtc")
                .SetValue(observation, new DateTimeOffset(dateTimeUtc));

                foreach (var dataPoint in group)
                {
                    var prop = observation
                               .GetType()
                               .GetProperty(dataPoint.Name);

                    if (prop != null)
                    {
                        prop.SetValue(observation, dataPoint.PhysicalQuantities.First().Value);
                    }
                }

                observations.Add(observation);
            }

            tidyData.Observations = observations;

            return(tidyData);
        }