public static async Task <string> StartSampleDataBySQL(this AWSAthenaAPI athenaApi, string sql) { var result = new DataSampleWithSchema() { FieldMappings = new List <FieldMapping>(), }; var sample = new DataSample() { Rows = new List <DataRow>() }; result.DataSample = sample; // var response = await athenaApi.ExecuteQuery(sql); return(await athenaApi.StartQuery(sql)); }
public static async Task GetAthenaQueryResultSampleByDate(this EtlSettings etlSettings, int lines) { var athena = etlSettings.AthenaQuerySource; if (athena == null) { throw new Exception("The ETL has an empty Athena source setting."); } var athenaApi = etlSettings.CreateSourceAthenaAPI(); var query = athena.AthenaSQL; var today = DateTime.Now; var date = today.AddDays(-athena.DaysAgo); query = query.Replace("{date}", date.ToString(athena.DateFormat)); query = rgxDateOffset.Replace(query, m => { var offset = int.Parse(m.Groups[1].Value); return(date.AddDays(offset).ToString(athena.DateFormat)); }); query += $"\nlimit {lines}"; var getResultRequest = await athenaApi.ExecuteQuery(query); var response = await athenaApi.ReadOneResult(getResultRequest); etlSettings.Mappings = response.ToFieldMapping(); // load data schema to the etlsetting schema var sample = new DataSample() { Rows = new List <DataRow>() }; var data = response.ReadData(); foreach (var row in data) { var dataRow = new DataRow() { Items = row.Select(item => item.ToString()).ToList() }; sample.Rows.Add(dataRow); } etlSettings.Sample = sample; }
public static async Task <DataSampleWithSchema> TryObtainSampleDataResult(this AWSAthenaAPI athenaApi, string executionId) { if (await athenaApi.IsExecutionCompleted(executionId)) { var result = new DataSampleWithSchema() { FieldMappings = new List <FieldMapping>(), }; var sample = new DataSample() { Rows = new List <DataRow>() }; result.DataSample = sample; var response = await athenaApi.ReadOneResult(new GetQueryResultsRequest() { QueryExecutionId = executionId }); var data = response.ReadData(); result.FieldMappings = response.ToFieldMapping(); foreach (var row in data) { var dataRow = new DataRow() { Items = row.Select(item => item.ToString()).ToList() }; sample.Rows.Add(dataRow); } return(result); } else { return(null); } }
public static async Task GetBigQueryResultSampleByDate(this EtlSettings etlSettings, int lines) { var awsS3Api = etlSettings.CreateTargetS3API(); var ga = etlSettings.GoogleAnalyticsQuerySource; Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", $"{AppContext.BaseDirectory}/{ga.GoogleAnalyticsSettingFile}"); BigQueryClient client = BigQueryClient.Create(ga.GoogleAnalyticsProjectId); string sql = ga.BigQuerySQL; string dateQueryKey = DateTime.Now.AddDays(-ga.DaysAgo).ToString(ga.DateFormat); // make sure the query is limited by 20 sql = sql.Replace("{date}", dateQueryKey) + $"\nlimit {lines}"; var job = await client.CreateQueryJobAsync(sql, new List <BigQueryParameter>()); BigQueryResults results = null; results = await client.GetQueryResultsAsync(job.Reference, new GetQueryResultsOptions() { StartIndex = 0, PageSize = 20000, }); var enumerator = results.GetEnumerator(); List <BigQueryRow> rows = new List <BigQueryRow>(); while (enumerator.MoveNext()) { rows.Add(enumerator.Current); } // map schema to athena types etlSettings.Mappings = results.ToFieldMappings(); var sample = new DataSample() { Rows = new List <DataRow>() }; // convert big query data to sample data foreach (var row in rows) { sample.Rows.Add(new DataRow() { Items = row.RawRow.F.Select(item => { if (item.V == null) { return(""); } else if (item.V.GetType() == typeof(DateTime)) { return(((DateTime)item.V).ToString("o")); } else if (item.V.GetType() == typeof(byte[])) { return(Convert.ToBase64String((byte[])item.V)); } else { return(item.V.ToString()); } }).ToList() }); } etlSettings.Sample = sample; }
/// <summary> /// compile the pipes and run the definition query /// </summary> /// <param name="etlSettings"></param> /// <returns></returns> public static async Task ParseAthenaQueryPipes(this EtlSettings etlSettings) { if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes) { return; } var pipesSource = etlSettings.AthenaQueryPipesSource; AthenaParserSetting parserLogger = new AthenaParserSetting(); parserLogger.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd(); parserLogger.DefaultTableName = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`"; parserLogger.Date = DateTime.UtcNow.AddDays(-pipesSource.DaysAgo); parserLogger.DateFormat = pipesSource.DateFormat; parserLogger.TempDatabase = pipesSource.TempDatabase; parserLogger.TempTablePath = pipesSource.TempDataPath.FixPathEnd(); pipesSource.ParseErrors = ""; if (!string.IsNullOrWhiteSpace(etlSettings.AthenaQueryPipesSource.Caches)) { try { var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches); foreach (var cache in caches) { if (!cache.S3Path.EndsWith("/")) { cache.S3Path += "/"; } parserLogger.Caches.Add(cache.Key, cache); } } catch (Exception ex) { pipesSource.ParseErrors += ex.Message; pipesSource.ParseErrors += "\n"; } } try { var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserLogger); pipesSource.ParsedQuery = parsed.ToQueryString(); } catch (Exception ex) { pipesSource.ParseErrors = parserLogger.ToString(); pipesSource.ParseErrors += "\n"; pipesSource.ParseErrors += ex.Message; pipesSource.ParsedQuery = ""; } // run if there is definition query if (Regex.IsMatch(pipesSource.AthenaDefinitionSQL, @"\S+")) { var athenaApi = etlSettings.CreatePipesSourceAthenaAPI(); var getResultRequest = await athenaApi.ExecuteQuery(pipesSource.AthenaDefinitionSQL); var response = await athenaApi.ReadOneResult(getResultRequest); etlSettings.Mappings = response.ToFieldMapping(); // load data schema to the etlsetting schema var sample = new DataSample() { Rows = new List <DataRow>() }; var data = response.ReadData(); foreach (var row in data) { var dataRow = new DataRow() { Items = row.Select(item => item.ToString()).ToList() }; sample.Rows.Add(dataRow); } etlSettings.Sample = sample; } }