public void Transform_ValidDataHandHarvestYieldV1_ReturnsExpected() { // Arrange CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample> sut = new CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample>( new MapFromHandHarvestYieldV1ToVegetationSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", "", "CookEastCropHandHarvest", "CookEast", "VegetationSample"); TidyData tidyData = ManualArranger.GetTidyDataDerivedFromActualDataV1(); List <VegetationSample> expected = ManualArranger.GeHandHarvestSampleDerivedFromActualDataV1(); // Act List <VegetationSample> actual = new List <VegetationSample>() { sut.Transform(tidyData).First() }; // Assert Assert.Equal(expected.Count, actual.Count); Assert.True(ComparerUtil.AreVegetationSamplesRoughlyEqual( expected, actual)); }
public void Transform_NullValuesHandHarvestYieldV1_ThrowsArgumentNullException() { CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample> sut = new CosmosDBSqlApiSampleV2Transformer <HandHarvestYieldV1, VegetationSample>( new MapFromHandHarvestYieldV1ToVegetationSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", "", "CookEastCropHandHarvest", "CookEast", "VegetationSample"); TidyData tidyData = ManualArranger.GetTidyDataWithNullsV1(); // Act Action act = () => sut.Transform(tidyData); // Assert Assert.Throws <ArgumentNullException>(act); }
public async Task SoilGridPointSurveyV1ToCosmos_ActualData_CreatesExpectedRecords() { // Arrange var extractor = new TidyDataCsvExtractor( pathToFileWithValidSoilGridPointSurveyV1Data, pathToFileWithValidSoilGridPointSurveyV1Dictionary); EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "0.1", "", DateTime.UtcNow); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); // Act TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); List <SoilSample> transformed = transformer.Transform(extracted); StoredProcedureResponse <bool>[] results = await loader.LoadBulk(transformed); Assert.Equal(30, transformed.Count); Assert.NotEmpty(results); }
static async Task MainAsync() { EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "1.0", "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB", DateTime.UtcNow); var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json"); var configuration = builder.Build(); JsonSerializerSettings serializerSettings = new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }; string data = configuration["PathToData"]; string dict = configuration["PathToDictionary"]; if (!File.Exists(data) | !File.Exists(dict)) { throw new FileNotFoundException(); } etlEvent.Inputs.Add(data); etlEvent.Inputs.Add(dict); DocumentClient client; try { client = new DocumentClient( new Uri( configuration["CosmosServiceEndpoint"]), configuration["CosmosAuthKey"], serializerSettings); } catch (Exception e) { etlEvent.Logs.Add( $"Error creating DocumentClient: {e.Message}"); throw new Exception("Error creating DocumentClient", e); } var extractor = new TidyDataCsvExtractor( configuration["PathToData"], configuration["PathToDictionary"]); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); try { TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); etlEvent.Logs.Add( $"Extracted TidyData with {extracted.Observations.Count} observations"); List <SoilSample> transformed = transformer.Transform(extracted); etlEvent.Logs.Add( $"Transformed TidyData to {transformed.Count} SoilSamples"); int docsLoaded = 0; int docsError = 0; foreach (SoilSample sample in transformed) { ResourceResponse <Document> result = await loader.LoadNoReplace(sample); if (result.StatusCode == HttpStatusCode.Created) { etlEvent.Outputs.Add(result.Resource.Id); docsLoaded++; } else { docsError++; } // Notify data written then sleep to conserve Cosmos RU Console.Write("."); Thread.Sleep(40); } etlEvent.Logs.Add( $"Loaded {docsLoaded.ToString()} SoilSamples"); etlEvent.Logs.Add( $"Error loading {docsError.ToString()} SoilSamples"); } catch (Exception e) { etlEvent.Logs.Add( $"Error in ETL pipeline: {e.Message}"); throw new Exception("Error in ETL pipeline", e); } finally { etlEvent.DateTimeEnd = DateTime.UtcNow; ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent); } }