public async Task LoadNoReplace_RecordExists_ReturnsOldRecord() { // ARRANGE var e = CosmosDBSqlApiArranger.GetEtlEventMock("EtlEvent_2018-05-22T01:00:00.000000Z"); var datetime = DateTime.UtcNow; DocumentLoader sut = new DocumentLoader( client, "cafdb", "items"); // ACT ResourceResponse <Document> result = await sut.LoadNoReplace(e); // ASSERT Assert.True(result.StatusCode == HttpStatusCode.OK); Assert.True(result.Resource.Timestamp < datetime); }
public async Task LoadNoReplace_RecordDoesNotExist_ReturnsNewRecord() { // ARRANGE var datetime = DateTime.UtcNow; DocumentClient client = new DocumentClient( new Uri("https://localhost:8081"), "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw=="); var e = CosmosDBSqlApiArranger.GetEtlEventMock($"EtlEvent_{DateTime.UtcNow.ToString("o")}"); DocumentLoader sut = new DocumentLoader( client, "cafdb", "items"); // ACT ResourceResponse <Document> result = await sut.LoadNoReplace(e); // ASSERT Assert.True(result.StatusCode == HttpStatusCode.Created); Assert.True(result.Resource.Timestamp > datetime); }
public async Task LoadNoReplace_RecordExists_ReturnsOldRecord() { // ARRANGE DocumentClient client = new DocumentClient( new Uri("https://localhost:8081"), "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw=="); var e = CosmosDBSqlApiArranger.GetEtlEventMock("EtlEvent_2018-05-22T01:00:00.000000Z"); var datetime = DateTime.UtcNow; DocumentLoader sut = new DocumentLoader( client, "cafdb", "items"); // ACT ResourceResponse <Document> result = await sut.LoadNoReplace(e); // ASSERT Assert.True(result.StatusCode == HttpStatusCode.OK); Assert.True(result.Resource.Timestamp < datetime); }
public async Task LoadNoReplace_RecordDoesNotExist_ReturnsNewRecord() { // ARRANGE var datetime = DateTime.UtcNow; var e = CosmosDBSqlApiArranger.GetEtlEventMock($"EtlEvent_{DateTime.UtcNow.ToString("o")}"); DocumentLoader sut = new DocumentLoader( client, "cafdb", "items"); // ACT ResourceResponse <Document> result = await sut.LoadNoReplace(e); // ASSERT Assert.True(result.StatusCode == HttpStatusCode.Created); Assert.InRange <DateTime>( result.Resource.Timestamp, datetime.Add(new TimeSpan(0, -1, 0)), datetime.Add(new TimeSpan(0, 1, 0))); }
public static async Task Run( [BlobTrigger("ectower-cookeast/raw/Flux/{name}", Connection = "ltarcafdatastreamConnectionString")] Stream myBlob, string name, TraceWriter log, ExecutionContext context) { log.Info($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes"); //var config = new ConfigurationBuilder() // .SetBasePath(context.FunctionAppDirectory) // .AddJsonFile("local.settings.json", optional: true, reloadOnChange: true) // .AddEnvironmentVariables() // .Build(); EtlEvent etlEvent = new EtlEvent( "EtlEvent", "AzureFunction", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CafMeteorologyEcTower", "1.1", "LoggerNetFluxToCosmosDBSqlApiMeasurementCookEast", DateTime.UtcNow); etlEvent.Outputs = null; etlEvent.Inputs.Add($"ectower-cookeast/raw/Flux/{name}"); etlEvent.Logs.Add($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes"); StreamReader reader = new StreamReader(myBlob); string contents = ""; log.Info("About to read contents"); try { contents = reader.ReadToEnd(); } catch (Exception e) { etlEvent.Logs.Add( $"Error reading Blob: {e.Message}"); } //DocumentClient client = new DocumentClient( // new Uri( // config["Values:AzureCosmosDBUri"]), // config["Values:AzureCosmosDBKey"]); DocumentClient client; try { client = new DocumentClient( new Uri( ConfigurationManager.AppSettings["AzureCosmosDBUri"]), ConfigurationManager.AppSettings["AzureCosmosDBKey"]); } catch (Exception e) { etlEvent.Logs.Add( $"Error creating DocumentClient: {e.Message}"); log.Error($"Error creating DocumentClient: {e.Message}"); throw new Exception("Error creating DocumentClient", e); } DocumentLoader loader = new DocumentLoader( client, "cafdb", "items"); log.Info("Created client and loader"); if (!String.IsNullOrEmpty(contents)) { try { log.Info("Attempting extract and transform"); TOA5Extractor extractor = new TOA5Extractor( name, contents, -8); TOA5 fluxTable = extractor.GetTOA5 <Flux>(); // TODO: Move strings and such to settings file DocumentDbMeasurementV2Transformer transformer = new DocumentDbMeasurementV2Transformer( new MapFromFluxDataTableToCafStandards(), "http://files.cafltar.org/data/schema/documentDb/v2/measurement.json", etlEvent.Id, "Measurement", "CafMeteorologyEcTower", 1800); List <MeasurementV2> measurements = transformer.ToMeasurements(fluxTable); log.Info("Attempting load"); /// Using the bulkImport sproc doesn't provide much benefit since /// most data tables will only have a few measurements with the /// same partition key. But it's better than nothing. StoredProcedureResponse <bool>[] results = await loader.LoadBulk(measurements); log.Info($"Loaded {results.Length.ToString()} measurements"); } catch (Exception e) { etlEvent.Logs.Add( $"Error in ETL pipeline: {e.Message}"); log.Error($"Error in ETL pipeline: {e.Message}"); throw new Exception("Error in ETL pipeline", e); } finally { log.Info("Loading etlEvent to db"); etlEvent.DateTimeEnd = DateTime.UtcNow; ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent); log.Info($"Result of writing EtlEvent: {result.StatusCode.ToString()}"); } log.Info("Function completed"); } }
public async Task PipeItAsync() { EtlEvent etlEvent = new EtlEvent( "EtlEvent", "AzureFunction", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CafMeteorologyEcTower", version, functionName, DateTime.UtcNow); etlEvent.Inputs.Add(blobPath); StreamReader reader = new StreamReader(myBlob); string contents = ""; log.LogInformation("About to read contents"); try { contents = reader.ReadToEnd(); } catch (Exception e) { etlEvent.Logs.Add( $"Error reading Blob: {e.Message}"); } DocumentLoader loader = new DocumentLoader( client, "cafdb", "items"); log.LogInformation("Created loader"); if (!String.IsNullOrEmpty(contents)) { try { log.LogInformation("Attempting extract and transform"); TOA5Extractor extractor = new TOA5Extractor( name, contents, -8); TOA5 toa5 = extractor.GetTOA5(observation); CosmosDBSqlApiV2Transformer transformer = new CosmosDBSqlApiV2Transformer( new MapFromToa5DataTableToCafStandards(), "http://files.cafltar.org/data/schema/documentDb/v2/measurement.json", etlEvent.Id, "Measurement", "CafMeteorologyEcTower", timestep); List <MeasurementV2> measurements = transformer.ToMeasurements(toa5); log.LogInformation("Attempting load"); int docsLoaded = 0; int docsError = 0; foreach (MeasurementV2 measurement in measurements) { try { ResourceResponse <Document> result = await loader.LoadNoReplace(measurement); if ( result.StatusCode == HttpStatusCode.Created || result.StatusCode == HttpStatusCode.OK) { etlEvent.Outputs.Add(result.Resource.Id); docsLoaded++; } else { etlEvent.Logs.Add( $"StatusCode: {result.StatusCode} on MeasurementV2: {measurement.Id.ToString()}"); docsError++; } } catch (Exception e) { etlEvent.Logs.Add( $"Error loading {measurement.Id.ToString()} MeasurementV2: {e.Message}"); log.LogError($"Error loading MeasurementV2: {e.Message}"); docsError++; } } log.LogInformation( $"Loaded {docsLoaded.ToString()} MeasurementV2s."); log.LogInformation( $"Error loading {docsError.ToString()} MeasurementV2s."); etlEvent.Logs.Add( $"Loaded {docsLoaded.ToString()} MeasurementV2s"); etlEvent.Logs.Add( $"Error loading {docsError.ToString()} MeasurementV2s"); } catch (Exception e) { etlEvent.Logs.Add( $"Error in ETL pipeline: {e.Message}"); log.LogError($"Error in ETL pipeline: {e.Message}"); throw new Exception("Error in ETL pipeline", e); } finally { log.LogInformation("Loading etlEvent to db"); etlEvent.DateTimeEnd = DateTime.UtcNow; ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent); log.LogInformation($"Result of writing EtlEvent: {result.StatusCode.ToString()}"); } } }
static async Task MainAsync() { EtlEvent etlEvent = new EtlEvent( "EtlEvent", "LocalProcess", "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json", "CookEastSoilGridPointSurvey", "1.0", "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB", DateTime.UtcNow); var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json"); var configuration = builder.Build(); JsonSerializerSettings serializerSettings = new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }; string data = configuration["PathToData"]; string dict = configuration["PathToDictionary"]; if (!File.Exists(data) | !File.Exists(dict)) { throw new FileNotFoundException(); } etlEvent.Inputs.Add(data); etlEvent.Inputs.Add(dict); DocumentClient client; try { client = new DocumentClient( new Uri( configuration["CosmosServiceEndpoint"]), configuration["CosmosAuthKey"], serializerSettings); } catch (Exception e) { etlEvent.Logs.Add( $"Error creating DocumentClient: {e.Message}"); throw new Exception("Error creating DocumentClient", e); } var extractor = new TidyDataCsvExtractor( configuration["PathToData"], configuration["PathToDictionary"]); var transformer = new CosmosDBSqlApiSampleV2Transformer <SoilGridPointSurveyV1, SoilSample>( new MapFromSoilGridPointSurveyToSoilSample(), "http://files.cafltar.org/data/schema/documentDb/v2/sample.json", etlEvent.Id, "CookEastSoilGridPointSurvey", "CookEast", "SoilSample"); var loader = new DocumentLoader( client, "cafdb", "items"); try { TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>(); etlEvent.Logs.Add( $"Extracted TidyData with {extracted.Observations.Count} observations"); List <SoilSample> transformed = transformer.Transform(extracted); etlEvent.Logs.Add( $"Transformed TidyData to {transformed.Count} SoilSamples"); int docsLoaded = 0; int docsError = 0; foreach (SoilSample sample in transformed) { ResourceResponse <Document> result = await loader.LoadNoReplace(sample); if (result.StatusCode == HttpStatusCode.Created) { etlEvent.Outputs.Add(result.Resource.Id); docsLoaded++; } else { docsError++; } // Notify data written then sleep to conserve Cosmos RU Console.Write("."); Thread.Sleep(40); } etlEvent.Logs.Add( $"Loaded {docsLoaded.ToString()} SoilSamples"); etlEvent.Logs.Add( $"Error loading {docsError.ToString()} SoilSamples"); } catch (Exception e) { etlEvent.Logs.Add( $"Error in ETL pipeline: {e.Message}"); throw new Exception("Error in ETL pipeline", e); } finally { etlEvent.DateTimeEnd = DateTime.UtcNow; ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent); } }