示例#1
0
        public static EtlEvent GetEtlEventMock(string id)
        {
            EtlEvent e = new EtlEvent(
                "EtlEvent_AzureFunction",
                id,
                "EtlEvent",
                "AzureFunction",
                "http://files.cafltar.org/data/schema/documentDb/v2/EtlEvent.json",
                "CafMeteorologyEcTower",
                "", "", "", "", null,
                "0.1",
                "AzureFunction",
                new DateTime(2018, 5, 22, 5, 0, 0).ToUniversalTime(),
                new DateTime(2018, 5, 22, 5, 2, 0).ToUniversalTime(),
                new List <string>()
            {
                @"C:\Files\foo.csv"
            },
                new List <string>()
            {
                @"C:\Files\Output\bah.csv"
            },
                new List <string>()
            {
                "Operation message",
                "Error: yo momma",
                "2018-05-22T01:01:00.000000Z | Message"
            });

            return(e);
        }
        public LoggerNetToa5ToCosmosDBSqlApiMeasurement(
            EtlEvent etlEvent,
            string contents,
            DocumentClient client)
        {
            this.etlEvent = etlEvent;
            this.contents = contents;
            this.client   = client;

            DocumentLoader loader = new DocumentLoader(
                client,
                "cafdb",
                "items");
        }
        public async Task SoilGridPointSurveyV1ToCosmos_ActualData_CreatesExpectedRecords()
        {
            // Arrange
            var extractor = new TidyDataCsvExtractor(
                pathToFileWithValidSoilGridPointSurveyV1Data,
                pathToFileWithValidSoilGridPointSurveyV1Dictionary);
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "LocalProcess",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CookEastSoilGridPointSurvey",
                "0.1",
                "",
                DateTime.UtcNow);
            var transformer = new CosmosDBSqlApiSampleV2Transformer
                              <SoilGridPointSurveyV1, SoilSample>(
                new MapFromSoilGridPointSurveyToSoilSample(),
                "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                etlEvent.Id,
                "CookEastSoilGridPointSurvey",
                "CookEast",
                "SoilSample");

            var loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            // Act
            TidyData          extracted   = extractor.Extract <SoilGridPointSurveyV1>();
            List <SoilSample> transformed = transformer.Transform(extracted);

            StoredProcedureResponse <bool>[] results = await loader.LoadBulk(transformed);

            Assert.Equal(30, transformed.Count);
            Assert.NotEmpty(results);
        }
        public static async Task Run(
            [BlobTrigger("ectower-cookeast/raw/Flux/{name}", Connection = "ltarcafdatastreamConnectionString")] Stream myBlob,
            string name,
            TraceWriter log,
            ExecutionContext context)
        {
            log.Info($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");

            //var config = new ConfigurationBuilder()
            //    .SetBasePath(context.FunctionAppDirectory)
            //    .AddJsonFile("local.settings.json", optional: true, reloadOnChange: true)
            //    .AddEnvironmentVariables()
            //    .Build();

            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "AzureFunction",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CafMeteorologyEcTower",
                "1.1", "LoggerNetFluxToCosmosDBSqlApiMeasurementCookEast",
                DateTime.UtcNow);

            etlEvent.Outputs = null;
            etlEvent.Inputs.Add($"ectower-cookeast/raw/Flux/{name}");
            etlEvent.Logs.Add($"C# Blob trigger function Processed blob\n Name:{name} \n Size: {myBlob.Length} Bytes");

            StreamReader reader   = new StreamReader(myBlob);
            string       contents = "";

            log.Info("About to read contents");
            try
            {
                contents = reader.ReadToEnd();
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error reading Blob: {e.Message}");
            }

            //DocumentClient client = new DocumentClient(
            //    new Uri(
            //        config["Values:AzureCosmosDBUri"]),
            //        config["Values:AzureCosmosDBKey"]);

            DocumentClient client;

            try
            {
                client = new DocumentClient(
                    new Uri(
                        ConfigurationManager.AppSettings["AzureCosmosDBUri"]),
                    ConfigurationManager.AppSettings["AzureCosmosDBKey"]);
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error creating DocumentClient: {e.Message}");
                log.Error($"Error creating DocumentClient: {e.Message}");
                throw new Exception("Error creating DocumentClient", e);
            }


            DocumentLoader loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            log.Info("Created client and loader");
            if (!String.IsNullOrEmpty(contents))
            {
                try
                {
                    log.Info("Attempting extract and transform");
                    TOA5Extractor extractor = new TOA5Extractor(
                        name,
                        contents,
                        -8);

                    TOA5 fluxTable = extractor.GetTOA5 <Flux>();

                    // TODO: Move strings and such to settings file
                    DocumentDbMeasurementV2Transformer transformer =
                        new DocumentDbMeasurementV2Transformer(
                            new MapFromFluxDataTableToCafStandards(),
                            "http://files.cafltar.org/data/schema/documentDb/v2/measurement.json",
                            etlEvent.Id,
                            "Measurement",
                            "CafMeteorologyEcTower",
                            1800);

                    List <MeasurementV2> measurements =
                        transformer.ToMeasurements(fluxTable);
                    log.Info("Attempting load");
                    /// Using the bulkImport sproc doesn't provide much benefit since
                    /// most data tables will only have a few measurements with the
                    /// same partition key.  But it's better than nothing.
                    StoredProcedureResponse <bool>[] results = await loader.LoadBulk(measurements);

                    log.Info($"Loaded {results.Length.ToString()} measurements");
                }
                catch (Exception e)
                {
                    etlEvent.Logs.Add(
                        $"Error in ETL pipeline: {e.Message}");
                    log.Error($"Error in ETL pipeline: {e.Message}");
                    throw new Exception("Error in ETL pipeline", e);
                }
                finally
                {
                    log.Info("Loading etlEvent to db");
                    etlEvent.DateTimeEnd = DateTime.UtcNow;
                    ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent);

                    log.Info($"Result of writing EtlEvent: {result.StatusCode.ToString()}");
                }

                log.Info("Function completed");
            }
        }
        public async Task PipeItAsync()
        {
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "AzureFunction",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CafMeteorologyEcTower",
                version, functionName,
                DateTime.UtcNow);

            etlEvent.Inputs.Add(blobPath);

            StreamReader reader   = new StreamReader(myBlob);
            string       contents = "";

            log.LogInformation("About to read contents");
            try
            {
                contents = reader.ReadToEnd();
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error reading Blob: {e.Message}");
            }

            DocumentLoader loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            log.LogInformation("Created loader");
            if (!String.IsNullOrEmpty(contents))
            {
                try
                {
                    log.LogInformation("Attempting extract and transform");
                    TOA5Extractor extractor = new TOA5Extractor(
                        name,
                        contents,
                        -8);

                    TOA5 toa5 = extractor.GetTOA5(observation);

                    CosmosDBSqlApiV2Transformer transformer =
                        new CosmosDBSqlApiV2Transformer(
                            new MapFromToa5DataTableToCafStandards(),
                            "http://files.cafltar.org/data/schema/documentDb/v2/measurement.json",
                            etlEvent.Id,
                            "Measurement",
                            "CafMeteorologyEcTower",
                            timestep);

                    List <MeasurementV2> measurements =
                        transformer.ToMeasurements(toa5);
                    log.LogInformation("Attempting load");

                    int docsLoaded = 0;
                    int docsError  = 0;
                    foreach (MeasurementV2 measurement in measurements)
                    {
                        try
                        {
                            ResourceResponse <Document> result =
                                await loader.LoadNoReplace(measurement);

                            if (
                                result.StatusCode == HttpStatusCode.Created ||
                                result.StatusCode == HttpStatusCode.OK)
                            {
                                etlEvent.Outputs.Add(result.Resource.Id);
                                docsLoaded++;
                            }
                            else
                            {
                                etlEvent.Logs.Add(
                                    $"StatusCode: {result.StatusCode} on MeasurementV2: {measurement.Id.ToString()}");
                                docsError++;
                            }
                        }
                        catch (Exception e)
                        {
                            etlEvent.Logs.Add(
                                $"Error loading {measurement.Id.ToString()} MeasurementV2: {e.Message}");
                            log.LogError($"Error loading MeasurementV2: {e.Message}");
                            docsError++;
                        }
                    }
                    log.LogInformation(
                        $"Loaded {docsLoaded.ToString()} MeasurementV2s.");
                    log.LogInformation(
                        $"Error loading {docsError.ToString()} MeasurementV2s.");
                    etlEvent.Logs.Add(
                        $"Loaded {docsLoaded.ToString()} MeasurementV2s");
                    etlEvent.Logs.Add(
                        $"Error loading {docsError.ToString()} MeasurementV2s");
                }
                catch (Exception e)
                {
                    etlEvent.Logs.Add(
                        $"Error in ETL pipeline: {e.Message}");
                    log.LogError($"Error in ETL pipeline: {e.Message}");
                    throw new Exception("Error in ETL pipeline", e);
                }
                finally
                {
                    log.LogInformation("Loading etlEvent to db");
                    etlEvent.DateTimeEnd = DateTime.UtcNow;
                    ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent);

                    log.LogInformation($"Result of writing EtlEvent: {result.StatusCode.ToString()}");
                }
            }
        }
示例#6
0
        static async Task MainAsync()
        {
            EtlEvent etlEvent = new EtlEvent(
                "EtlEvent",
                "LocalProcess",
                "http://files.cafltar.org/data/schema/documentDb/v2/etlEvent.json",
                "CookEastSoilGridPointSurvey",
                "1.0",
                "CookEastSoilGridPointSurvey_DotNet_SoilGridPointToCosmosDB",
                DateTime.UtcNow);

            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.json");

            var configuration = builder.Build();

            JsonSerializerSettings serializerSettings = new JsonSerializerSettings
            {
                NullValueHandling = NullValueHandling.Ignore
            };

            string data = configuration["PathToData"];
            string dict = configuration["PathToDictionary"];

            if (!File.Exists(data) |
                !File.Exists(dict))
            {
                throw new FileNotFoundException();
            }

            etlEvent.Inputs.Add(data);
            etlEvent.Inputs.Add(dict);

            DocumentClient client;

            try
            {
                client = new DocumentClient(
                    new Uri(
                        configuration["CosmosServiceEndpoint"]),
                    configuration["CosmosAuthKey"],
                    serializerSettings);
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error creating DocumentClient: {e.Message}");
                throw new Exception("Error creating DocumentClient", e);
            }

            var extractor = new TidyDataCsvExtractor(
                configuration["PathToData"],
                configuration["PathToDictionary"]);

            var transformer = new CosmosDBSqlApiSampleV2Transformer
                              <SoilGridPointSurveyV1, SoilSample>(
                new MapFromSoilGridPointSurveyToSoilSample(),
                "http://files.cafltar.org/data/schema/documentDb/v2/sample.json",
                etlEvent.Id,
                "CookEastSoilGridPointSurvey",
                "CookEast",
                "SoilSample");

            var loader = new DocumentLoader(
                client,
                "cafdb",
                "items");

            try
            {
                TidyData extracted = extractor.Extract <SoilGridPointSurveyV1>();

                etlEvent.Logs.Add(
                    $"Extracted TidyData with {extracted.Observations.Count} observations");

                List <SoilSample> transformed = transformer.Transform(extracted);

                etlEvent.Logs.Add(
                    $"Transformed TidyData to {transformed.Count} SoilSamples");

                int docsLoaded = 0;
                int docsError  = 0;
                foreach (SoilSample sample in transformed)
                {
                    ResourceResponse <Document> result =
                        await loader.LoadNoReplace(sample);

                    if (result.StatusCode == HttpStatusCode.Created)
                    {
                        etlEvent.Outputs.Add(result.Resource.Id);
                        docsLoaded++;
                    }
                    else
                    {
                        docsError++;
                    }

                    // Notify data written then sleep to conserve Cosmos RU
                    Console.Write(".");
                    Thread.Sleep(40);
                }

                etlEvent.Logs.Add(
                    $"Loaded {docsLoaded.ToString()} SoilSamples");
                etlEvent.Logs.Add(
                    $"Error loading {docsError.ToString()} SoilSamples");
            }
            catch (Exception e)
            {
                etlEvent.Logs.Add(
                    $"Error in ETL pipeline: {e.Message}");
                throw new Exception("Error in ETL pipeline", e);
            }
            finally
            {
                etlEvent.DateTimeEnd = DateTime.UtcNow;
                ResourceResponse <Document> result = await loader.LoadNoReplace(etlEvent);
            }
        }