public void TidyExtractor_InvalidFilePaths_ThrowsArgumentException() { // Arrange TidyDataCsvExtractor sut; // Act and Assert Assert.Throws <ArgumentException>(() => sut = new TidyDataCsvExtractor( @"Nodes/Manual/Assets/foo.csv", pathToFileWithValidDictionaryV1)); }
public void Extract_Nulls_ReturnsAllValues() { // Arrange TidyDataCsvExtractor sut = new TidyDataCsvExtractor( pathToFileWithNullsV1, pathToFileWithValidDictionaryV1); int expectedDocs = 10; // Act TidyData actual = sut.Extract <HandHarvestYieldV1>(); // Assert Assert.Equal(expectedDocs, actual.Observations.Count); }
public void Extract_ValidData_ReturnsExpected() { // Arrange TidyDataCsvExtractor sut = new TidyDataCsvExtractor( pathToFileWithValidDataSlimV1, pathToFileWithValidDictionaryV1); TidyData expected = ManualArranger .GetTidyDataDerivedFromActualDataV1(); // Act TidyData actual = sut.Extract <HandHarvestYieldV1>(); // Assert Assert.Equal(expected, actual); }
public async Task SoilGridPointSurveyV1ToCosmos_ActualData_CreatesExpectedRecords() { // Arrange var extractor = new TidyDataCsvExtractor( pathToFileWithValidSoilGridPointSurveyV1Data, pathToFileWithValidSoilGridPointSurveyV1Dictionary); EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "0.1", "", DateTime.UtcNow); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); // Act TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); List <SoilSample> transformed = transformer.Transform(extracted); StoredProcedureResponse <bool>[] results = await loader.LoadBulk(transformed); Assert.Equal(30, transformed.Count); Assert.NotEmpty(results); }
static async Task MainAsync() { EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "1.0", "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB", DateTime.UtcNow); var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json"); var configuration = builder.Build(); JsonSerializerSettings serializerSettings = new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }; string data = configuration["PathToData"]; string dict = configuration["PathToDictionary"]; if (!File.Exists(data) | !File.Exists(dict)) { throw new FileNotFoundException(); } etlEvent.Inputs.Add(data); etlEvent.Inputs.Add(dict); DocumentClient client; try { client = new DocumentClient( new Uri( configuration["CosmosServiceEndpoint"]), configuration["CosmosAuthKey"], serializerSettings); } catch (Exception e) { etlEvent.Logs.Add( $"Error creating DocumentClient: {e.Message}"); throw new Exception("Error creating DocumentClient", e); } var extractor = new TidyDataCsvExtractor( configuration["PathToData"], configuration["PathToDictionary"]); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); try { TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); etlEvent.Logs.Add( $"Extracted TidyData with {extracted.Observations.Count} observations"); List <SoilSample> transformed = transformer.Transform(extracted); etlEvent.Logs.Add( $"Transformed TidyData to {transformed.Count} SoilSamples"); int docsLoaded = 0; int docsError = 0; foreach (SoilSample sample in transformed) { ResourceResponse <Document> result = await loader.LoadNoReplace(sample); if (result.StatusCode == HttpStatusCode.Created) { etlEvent.Outputs.Add(result.Resource.Id); docsLoaded++; } else { docsError++; } // Notify data written then sleep to conserve Cosmos RU Console.Write("."); Thread.Sleep(40); } etlEvent.Logs.Add( $"Loaded {docsLoaded.ToString()} SoilSamples"); etlEvent.Logs.Add( $"Error loading {docsError.ToString()} SoilSamples"); } catch (Exception e) { etlEvent.Logs.Add( $"Error in ETL pipeline: {e.Message}"); throw new Exception("Error in ETL pipeline", e); } finally { etlEvent.DateTimeEnd = DateTime.UtcNow; ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent); } }