public static async Task <string> StartSampleDataBySQL(this AWSAthenaAPI athenaApi, string sql)
        {
            var result = new DataSampleWithSchema()
            {
                FieldMappings = new List <FieldMapping>(),
            };
            var sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            result.DataSample = sample;

            // var response = await athenaApi.ExecuteQuery(sql);
            return(await athenaApi.StartQuery(sql));
        }
        public static async Task GetAthenaQueryResultSampleByDate(this EtlSettings etlSettings, int lines)
        {
            var athena = etlSettings.AthenaQuerySource;

            if (athena == null)
            {
                throw new Exception("The ETL has an empty Athena source setting.");
            }
            var athenaApi = etlSettings.CreateSourceAthenaAPI();

            var query = athena.AthenaSQL;
            var today = DateTime.Now;
            var date  = today.AddDays(-athena.DaysAgo);

            query = query.Replace("{date}", date.ToString(athena.DateFormat));
            query = rgxDateOffset.Replace(query, m =>
            {
                var offset = int.Parse(m.Groups[1].Value);
                return(date.AddDays(offset).ToString(athena.DateFormat));
            });
            query += $"\nlimit {lines}";
            var getResultRequest = await athenaApi.ExecuteQuery(query);

            var response = await athenaApi.ReadOneResult(getResultRequest);

            etlSettings.Mappings = response.ToFieldMapping();
            // load data schema to the etlsetting schema

            var sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            var data = response.ReadData();

            foreach (var row in data)
            {
                var dataRow = new DataRow()
                {
                    Items = row.Select(item => item.ToString()).ToList()
                };
                sample.Rows.Add(dataRow);
            }
            etlSettings.Sample = sample;
        }
        public static async Task <DataSampleWithSchema> TryObtainSampleDataResult(this AWSAthenaAPI athenaApi, string executionId)
        {
            if (await athenaApi.IsExecutionCompleted(executionId))
            {
                var result = new DataSampleWithSchema()
                {
                    FieldMappings = new List <FieldMapping>(),
                };
                var sample = new DataSample()
                {
                    Rows = new List <DataRow>()
                };
                result.DataSample = sample;
                var response = await athenaApi.ReadOneResult(new GetQueryResultsRequest()
                {
                    QueryExecutionId = executionId
                });

                var data = response.ReadData();
                result.FieldMappings = response.ToFieldMapping();

                foreach (var row in data)
                {
                    var dataRow = new DataRow()
                    {
                        Items = row.Select(item => item.ToString()).ToList()
                    };
                    sample.Rows.Add(dataRow);
                }
                return(result);
            }
            else
            {
                return(null);
            }
        }
Esempio n. 4
0
        public static async Task GetBigQueryResultSampleByDate(this EtlSettings etlSettings, int lines)
        {
            var awsS3Api = etlSettings.CreateTargetS3API();
            var ga       = etlSettings.GoogleAnalyticsQuerySource;

            Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", $"{AppContext.BaseDirectory}/{ga.GoogleAnalyticsSettingFile}");

            BigQueryClient client = BigQueryClient.Create(ga.GoogleAnalyticsProjectId);

            string sql = ga.BigQuerySQL;

            string dateQueryKey = DateTime.Now.AddDays(-ga.DaysAgo).ToString(ga.DateFormat);

            // make sure the query is limited by 20
            sql = sql.Replace("{date}", dateQueryKey) + $"\nlimit {lines}";

            var job = await client.CreateQueryJobAsync(sql, new List <BigQueryParameter>());

            BigQueryResults results = null;

            results = await client.GetQueryResultsAsync(job.Reference, new GetQueryResultsOptions()
            {
                StartIndex = 0,
                PageSize   = 20000,
            });

            var enumerator = results.GetEnumerator();

            List <BigQueryRow> rows = new List <BigQueryRow>();

            while (enumerator.MoveNext())
            {
                rows.Add(enumerator.Current);
            }

            // map schema to athena types

            etlSettings.Mappings = results.ToFieldMappings();
            var sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            // convert big query data to sample data
            foreach (var row in rows)
            {
                sample.Rows.Add(new DataRow()
                {
                    Items = row.RawRow.F.Select(item =>
                    {
                        if (item.V == null)
                        {
                            return("");
                        }
                        else if (item.V.GetType() == typeof(DateTime))
                        {
                            return(((DateTime)item.V).ToString("o"));
                        }
                        else if (item.V.GetType() == typeof(byte[]))
                        {
                            return(Convert.ToBase64String((byte[])item.V));
                        }
                        else
                        {
                            return(item.V.ToString());
                        }
                    }).ToList()
                });
            }
            etlSettings.Sample = sample;
        }
        /// <summary>
        /// compile the pipes and run the definition query
        /// </summary>
        /// <param name="etlSettings"></param>
        /// <returns></returns>
        public static async Task ParseAthenaQueryPipes(this EtlSettings etlSettings)
        {
            if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes)
            {
                return;
            }

            var pipesSource = etlSettings.AthenaQueryPipesSource;
            AthenaParserSetting parserLogger = new AthenaParserSetting();

            parserLogger.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd();
            parserLogger.DefaultTableName  = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`";
            parserLogger.Date          = DateTime.UtcNow.AddDays(-pipesSource.DaysAgo);
            parserLogger.DateFormat    = pipesSource.DateFormat;
            parserLogger.TempDatabase  = pipesSource.TempDatabase;
            parserLogger.TempTablePath = pipesSource.TempDataPath.FixPathEnd();

            pipesSource.ParseErrors = "";

            if (!string.IsNullOrWhiteSpace(etlSettings.AthenaQueryPipesSource.Caches))
            {
                try
                {
                    var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches);
                    foreach (var cache in caches)
                    {
                        if (!cache.S3Path.EndsWith("/"))
                        {
                            cache.S3Path += "/";
                        }
                        parserLogger.Caches.Add(cache.Key, cache);
                    }
                }
                catch (Exception ex)
                {
                    pipesSource.ParseErrors += ex.Message;
                    pipesSource.ParseErrors += "\n";
                }
            }

            try
            {
                var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserLogger);
                pipesSource.ParsedQuery = parsed.ToQueryString();
            }
            catch (Exception ex)
            {
                pipesSource.ParseErrors  = parserLogger.ToString();
                pipesSource.ParseErrors += "\n";
                pipesSource.ParseErrors += ex.Message;
                pipesSource.ParsedQuery  = "";
            }

            // run if there is definition query
            if (Regex.IsMatch(pipesSource.AthenaDefinitionSQL, @"\S+"))
            {
                var athenaApi        = etlSettings.CreatePipesSourceAthenaAPI();
                var getResultRequest = await athenaApi.ExecuteQuery(pipesSource.AthenaDefinitionSQL);

                var response = await athenaApi.ReadOneResult(getResultRequest);

                etlSettings.Mappings = response.ToFieldMapping();
                // load data schema to the etlsetting schema

                var sample = new DataSample()
                {
                    Rows = new List <DataRow>()
                };

                var data = response.ReadData();
                foreach (var row in data)
                {
                    var dataRow = new DataRow()
                    {
                        Items = row.Select(item => item.ToString()).ToList()
                    };
                    sample.Rows.Add(dataRow);
                }

                etlSettings.Sample = sample;
            }
        }