public void Transform_ValidDataHandHarvestYieldV1_ReturnsExpected() { // Arrange CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample> sut = new CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample>( new MapFromHandHarvestYieldV1ToVegetationSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", "", "CookEastCropHandHarvest", "CookEast", "VegetationSample"); TidyData tidyData = ManualArranger.GetTidyDataDerivedFromActualDataV1(); List <VegetationSample> expected = ManualArranger.GeHandHarvestSampleDerivedFromActualDataV1(); // Act List <VegetationSample> actual = new List <VegetationSample>() { sut.Transform(tidyData).First() }; // Assert Assert.Equal(expected.Count, actual.Count); Assert.True(ComparerUtil.AreVegetationSamplesRoughlyEqual( expected, actual)); }
public TidyData Extract <T>() where T : IObservation { TidyData dataSet = new TidyData(); dataSet.Metadata = ExtractMetadata(); dataSet.Observations = ExtractObservations <T>() .Cast <IObservation>().ToList(); return(dataSet); }
public static TidyData GetTidyDataWithNullsV1() { TidyData tidy = new TidyData() { Metadata = GetMetadataDerivedFromActualDataV1(), Observations = GetObservationsWithNullsV1() }; return(tidy); }
public void Transform_GenericHasLessPropertiesThanMeasurements_ReturnsOnlyGenericProperties() { // Arrange List <MeasurementV2> measurements = LoggerNetArranger.GetMeasurementV2TwoVariablesMultipleTimesteps(); var sut = new ManualTidyDataTransformer(); // Act TidyData actual = sut.Transform <DataTableOneVar>(measurements); // Assert Assert.Equal(3, actual.Observations.Count); Assert.Equal(2, actual.Metadata.Variables.Count); }
public void Extract_Nulls_ReturnsAllValues() { // Arrange TidyDataCsvExtractor sut = new TidyDataCsvExtractor( pathToFileWithNullsV1, pathToFileWithValidDictionaryV1); int expectedDocs = 10; // Act TidyData actual = sut.Extract <HandHarvestYieldV1>(); // Assert Assert.Equal(expectedDocs, actual.Observations.Count); }
public void Extract_ValidData_ReturnsExpected() { // Arrange TidyDataCsvExtractor sut = new TidyDataCsvExtractor( pathToFileWithValidDataSlimV1, pathToFileWithValidDictionaryV1); TidyData expected = ManualArranger .GetTidyDataDerivedFromActualDataV1(); // Act TidyData actual = sut.Extract <HandHarvestYieldV1>(); // Assert Assert.Equal(expected, actual); }
public void Transform_ValidData_ExpectedResults() { // Arrange List <MeasurementV2> measurements = LoggerNetArranger.GetMeasurementV2TwoVariablesMultipleTimesteps(); var sut = new ManualTidyDataTransformer(); // Act TidyData actual = sut.Transform <DataTableTwoVar>(measurements); // Assert List <DataTableTwoVar> actualObs = actual.Observations.Cast <DataTableTwoVar>().ToList(); Assert.Equal(3, actual.Observations.Count); Assert.Equal(3, actual.Metadata.Variables.Count); Assert.NotNull(actualObs.Where(m => m.ParDensityTsAvg == 1806.077).FirstOrDefault()); }
/// <summary> /// /// </summary> /// <param name="data">Data to be written to CSV file</param> /// <param name="dirPath">Directory to write file to. Will be created if doesn't exist.</param> /// <param name="fileName">Name of the file, without extension. The current date, local to the machine, in ISO 8601 format, will be added.</param> /// <returns>A string containing the name of the data file</returns> public string LoadToFile(TidyData data, string dirPath, string fileName) { if (!Directory.Exists(dirPath)) { Directory.CreateDirectory(dirPath); } DateTime dt = DateTime.Now; string dataFileName = $"{fileName}_{dt.ToString("yyyyMMdd")}.csv"; string dictFileName = $"{fileName}_{dt.ToString("yyyyMMdd")}_Dictionary.csv"; using (var writer = new StreamWriter( Path.Combine(dirPath, dataFileName), false, Encoding.UTF8)) using (var csv = new CsvWriter(writer)) { // Format datetime strings csv.Configuration.CultureInfo = CultureInfo.InvariantCulture; csv.Configuration.TypeConverterOptionsCache.GetOptions <DateTimeOffset>().Formats = new[] { "yyyy-MM-ddTHH:mm:ssK" }; csv.Configuration.TypeConverterOptionsCache.GetOptions <DateTime>().Formats = new[] { "yyyy-MM-ddTHH:mm:ssK" }; // Need to convert list of interface to list of obj: https://stackoverflow.com/a/54795960/1621156 List <object> objects = new List <object>(); foreach (var observation in data.Observations) { objects.Add((object)observation); } csv.WriteRecords(objects); } using (var writer = new StreamWriter( Path.Combine(dirPath, dictFileName), false, Encoding.UTF8)) using (var csv = new CsvWriter(writer)) { csv.WriteRecords(data.Metadata.Variables); } return(dataFileName); }
public List <U> Transform(TidyData tidyData) { List <U> samples = new List <U>(); foreach (T observation in tidyData.Observations) { // Maps class specific data, returns null if not able to map U sample = Map.GetSample(observation); if (sample == null) { continue; } sample.Type = DocumentType; sample.Project = Project; sample.AreaOfInterest = AreaOfInterest; sample.PartitionKey = $"{sample.Type}_{sample.AreaOfInterest}_{sample.Name}"; sample.Schema = Schema; sample.Measurements = new List <MeasurementV2>(); foreach (Variable variable in tidyData.Metadata.Variables) { // Create MeasurementV2s // Use a mapper that defines the variables to keep // if(variable in mappers.VariablesToKeep) MeasurementV2 measurement = CreateMeasurementFromVariable( variable, observation, tidyData.Metadata); if (measurement != null) { sample.Measurements.Add(measurement); } } samples.Add(sample); } return(samples); }
public async void ExtractMeasurementV2FromCosmosTransformToTidyWriteToCsv() { // Arrange string uri = System.Configuration.ConfigurationManager.AppSettings["cosmosUri"]; string key = System.Configuration.ConfigurationManager.AppSettings["cosmosKey"]; string dirPath = @"Output"; string fileName = "test"; var extractor = new CafDbExtractor(uri, key); var transformer = new ManualTidyDataTransformer(); var loader = new TidyDataCsvLoader(); // Act List <MeasurementV2> measurements = await extractor.ExtractMeasurementsV2( "CafMeteorologyEcTower", "CookEast", "ParDensityTsAvg", "2019-12-12", "2019-12-13"); TidyData tidyData = transformer.Transform <ParDataTable>(measurements); loader.LoadToFile(tidyData, dirPath, fileName); // Assert var files = Directory.GetFiles(dirPath, $"{fileName}*"); Assert.Equal(2, files.Length); // Cleanup if (files.Length > 0) { foreach (var file in files) { File.Delete(file); } } }
public void Transform_NullValuesHandHarvestYieldV1_ThrowsArgumentNullException() { CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample> sut = new CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample>( new MapFromHandHarvestYieldV1ToVegetationSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", "", "CookEastCropHandHarvest", "CookEast", "VegetationSample"); TidyData tidyData = ManualArranger.GetTidyDataWithNullsV1(); // Act Action act = () => sut.Transform(tidyData); // Assert Assert.Throws <ArgumentNullException>(act); }
public async Task SoilGridPointSurveyV1ToCosmos_ActualData_CreatesExpectedRecords() { // Arrange var extractor = new TidyDataCsvExtractor( pathToFileWithValidSoilGridPointSurveyV1Data, pathToFileWithValidSoilGridPointSurveyV1Dictionary); EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "0.1", "", DateTime.UtcNow); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); // Act TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); List <SoilSample> transformed = transformer.Transform(extracted); StoredProcedureResponse <bool>[] results = await loader.LoadBulk(transformed); Assert.Equal(30, transformed.Count); Assert.NotEmpty(results); }
public static TidyData GetTidyDataDerivedFromMockDateTimeObservation() { TidyData td = new TidyData(); td.Metadata = new Metadata() { Variables = new List <Variable>() { new Variable() { FieldName = "DateTimeUtc", Units = "unitless", Description = "" } } }; td.Observations = new List <IObservation>() { new DateTimeObservation() { DateTimeUtc = new DateTimeOffset(new DateTime(2019, 12, 01, 8, 15, 00, DateTimeKind.Utc)) } }; return(td); }
static async Task MainAsync() { EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "1.0", "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB", DateTime.UtcNow); var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json"); var configuration = builder.Build(); JsonSerializerSettings serializerSettings = new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }; string data = configuration["PathToData"]; string dict = configuration["PathToDictionary"]; if (!File.Exists(data) | !File.Exists(dict)) { throw new FileNotFoundException(); } etlEvent.Inputs.Add(data); etlEvent.Inputs.Add(dict); DocumentClient client; try { client = new DocumentClient( new Uri( configuration["CosmosServiceEndpoint"]), configuration["CosmosAuthKey"], serializerSettings); } catch (Exception e) { etlEvent.Logs.Add( $"Error creating DocumentClient: {e.Message}"); throw new Exception("Error creating DocumentClient", e); } var extractor = new TidyDataCsvExtractor( configuration["PathToData"], configuration["PathToDictionary"]); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); try { TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); etlEvent.Logs.Add( $"Extracted TidyData with {extracted.Observations.Count} observations"); List <SoilSample> transformed = transformer.Transform(extracted); etlEvent.Logs.Add( $"Transformed TidyData to {transformed.Count} SoilSamples"); int docsLoaded = 0; int docsError = 0; foreach (SoilSample sample in transformed) { ResourceResponse <Document> result = await loader.LoadNoReplace(sample); if (result.StatusCode == HttpStatusCode.Created) { etlEvent.Outputs.Add(result.Resource.Id); docsLoaded++; } else { docsError++; } // Notify data written then sleep to conserve Cosmos RU Console.Write("."); Thread.Sleep(40); } etlEvent.Logs.Add( $"Loaded {docsLoaded.ToString()} SoilSamples"); etlEvent.Logs.Add( $"Error loading {docsError.ToString()} SoilSamples"); } catch (Exception e) { etlEvent.Logs.Add( $"Error in ETL pipeline: {e.Message}"); throw new Exception("Error in ETL pipeline", e); } finally { etlEvent.DateTimeEnd = DateTime.UtcNow; ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent); } }
// https://stackoverflow.com/a/10445840/1621156 /// <summary> /// Transforms a list of MeasurementsV2 to TidyData /// /// Assumes measurements are timeseries and spreads on DateTime. For spreading on other keys, use overloaded functions /// Assumes DateTime is in UTC /// </summary> /// <param name="measurements"></param> /// <returns></returns> public TidyData Transform <T>(List <MeasurementV2> measurements) where T : IObservation { if (measurements.Count == 0) { throw new ArgumentNullException("No measurements to convert"); } if (typeof(T).GetProperty("DateTimeUtc") == null) { throw new ArgumentException("Generic class needs a 'DateTimeUtc' property"); } TidyData tidyData = new TidyData(); tidyData.Metadata = new Metadata() { Variables = new List <Variable>() { new Variable() { FieldName = "DateTimeUtc", Units = "unitless", Description = "Date and time when measurement was collected, in UTC" } } }; // Build metadata // TODO: Add m.Description, once it's implemented in schema var variables = measurements .Select(m => new { m.Name, m.PhysicalQuantities.First().Unit }) .Distinct(); foreach (var variable in variables) { if (typeof(T).GetProperty(variable.Name) != null) { tidyData.Metadata.Variables.Add( new Variable() { FieldName = variable.Name, Units = variable.Unit }); } } // Build observations var groupedTypes = measurements .GroupBy(m => m.DateTime) .ToList(); List <IObservation> observations = new List <IObservation>(); foreach (var group in groupedTypes) { var observation = (T)Activator.CreateInstance(typeof(T)); DateTime dateTimeUtc = group.Key ?? DateTime.MinValue; dateTimeUtc = DateTime.SpecifyKind(dateTimeUtc, DateTimeKind.Utc); observation .GetType() .GetProperty("DateTimeUtc") .SetValue(observation, new DateTimeOffset(dateTimeUtc)); foreach (var dataPoint in group) { var prop = observation .GetType() .GetProperty(dataPoint.Name); if (prop != null) { prop.SetValue(observation, dataPoint.PhysicalQuantities.First().Value); } } observations.Add(observation); } tidyData.Observations = observations; return(tidyData); }