public void ParseQuereis() { for (int i = 1; i < 6; i++) { AthenaParserSetting athenaParserLogger = new AthenaParserSetting(); Debug.WriteLine($"****** Begin File {i} ******"); var filename = $"{AppContext.BaseDirectory}/query{i}.sql"; Debug.WriteLine($"File {i}: {filename}"); var query = File.ReadAllText(filename); try { var pipes = query.ParseAthenaPipes(athenaParserLogger); Debug.WriteLine($"****** End File {i} ******"); Debug.WriteLine($"****** Json File {i} ******"); var tree = JsonConvert.SerializeObject(pipes, Formatting.Indented); Debug.WriteLine($"****** Parsed File {i} ******"); Debug.WriteLine(pipes.ToQueryString().StripEmptyLines()); } catch (Exception ex) { Debug.WriteLine(athenaParserLogger.ToString()); Debug.Write(ex.Message); } } }
public static AthenaParserSetting BuildParserSetting(this StateMachineQueryContext context) { StateMachineSettings settings = context.settings; AthenaParserSetting parserSetting = new AthenaParserSetting(); parserSetting.DefaultExportPath = settings.DefaultExportPath; parserSetting.DefaultTableName = settings.DefaultTableName; parserSetting.Date = settings.Date; parserSetting.DateFormat = settings.DateFormat; parserSetting.TempDatabase = settings.TempDatabase; parserSetting.TempTablePath = settings.TempTablePath; return(parserSetting); }
/// <summary> /// run the athena query pipes /// </summary> /// <param name="etlSettings"></param> /// <param name="useDate"></param> /// <returns></returns> public static async Task RunAthenaQueryPipes(this EtlSettings etlSettings, DateTime?useDate = null) { if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes) { return; } var pipesSource = etlSettings.AthenaQueryPipesSource; AthenaParserSetting parserSetting = new AthenaParserSetting(); parserSetting.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd(); parserSetting.DefaultTableName = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`"; parserSetting.Date = useDate == null?DateTime.UtcNow.AddDays(-pipesSource.DaysAgo) : useDate.Value.AddDays(-pipesSource.DaysAgo); parserSetting.DateFormat = pipesSource.DateFormat; parserSetting.TempDatabase = pipesSource.TempDatabase; parserSetting.TempTablePath = pipesSource.TempDataPath.FixPathEnd(); var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches); foreach (var cache in caches) { if (!cache.S3Path.EndsWith("/")) { cache.S3Path += "/"; } parserSetting.Caches.Add(cache.Key, cache); } var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserSetting); await etlSettings.ExecuteControlFlow(parsed, parserSetting); var athenaApi = etlSettings.CreatePipesSourceAthenaAPI(); foreach (var kvp in parserSetting.Partitions) { await athenaApi.LoadPartitionIfNotExists(parserSetting.DefaultTableName, kvp.Key, kvp.Value); } }
public async void StateQueryTests() { int i = 0; StateMachineExecutionResult result; StateMachineQueryContext context; { i = 1; AthenaParserSetting athenaParserLogger = new AthenaParserSetting(); Debug.WriteLine($"****** Begin File {i} ******"); var filename = $"{AppContext.BaseDirectory}/query{i}.sql"; Debug.WriteLine($"File {i}: {filename}"); var query = File.ReadAllText(filename); try { var pipes = query.ParseAthenaPipes(athenaParserLogger); Debug.WriteLine($"****** End File {i} ******"); Debug.WriteLine($"****** Json File {i} ******"); var tree = JsonConvert.SerializeObject(pipes, Formatting.Indented); Debug.WriteLine($"****** Parsed File {i} ******"); bool seeking = false; context = new StateMachineQueryContext(); // context.ExecuteStateMachineQueryContext() result = pipes.LoadNextStateMachineQuery(new AthenaParserSetting(), new LinkedList <int>(), context, ref seeking); seeking = true; result = pipes.LoadNextStateMachineQuery(new AthenaParserSetting(), new LinkedList <int>(), context, ref seeking); Debug.WriteLine(pipes.ToQueryString().StripEmptyLines()); } catch (Exception ex) { Debug.WriteLine(athenaParserLogger.ToString()); Debug.Write(ex.Message); } } }
public static StateMachineQueryContext BuildStateMachineQueryContext(this EtlSettings etlSettings, DateTime?useDate = null) { if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes) { return(null); } var pipesSource = etlSettings.AthenaQueryPipesSource; AthenaParserSetting parserSetting = new AthenaParserSetting(); parserSetting.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd(); parserSetting.DefaultTableName = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`"; parserSetting.Date = useDate == null?DateTime.UtcNow.AddDays(-pipesSource.DaysAgo) : useDate.Value.AddDays(-pipesSource.DaysAgo); parserSetting.DateFormat = pipesSource.DateFormat; parserSetting.TempDatabase = pipesSource.TempDatabase; parserSetting.TempTablePath = pipesSource.TempDataPath.FixPathEnd(); var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches); foreach (var cache in caches) { if (!cache.S3Path.EndsWith("/")) { cache.S3Path += "/"; } parserSetting.Caches.Add(cache.Key, cache); } StateMachineQueryContext context = new StateMachineQueryContext(); context.raw = pipesSource.AthenaSQL; context.settings = new StateMachineSettings() { DefaultExportPath = parserSetting.DefaultExportPath, DefaultTableName = parserSetting.DefaultTableName, Date = parserSetting.Date, DateFormat = parserSetting.DateFormat, TempDatabase = parserSetting.TempDatabase, TempTablePath = parserSetting.TempTablePath, Caches = parserSetting.Caches.Values.ToList(), Clearings = parserSetting.Clearings.Select(kvp => new KeyValueEntry() { Key = kvp.Key, Value = kvp.Value }).ToList(), Commands = parserSetting.Commands, DroppingTables = parserSetting.DroppingTables, Partitions = parserSetting.Partitions.Select(kvp => new KeyValueEntry() { Key = kvp.Key, Value = kvp.Value }).ToList(), Variables = parserSetting.Variables.Select(kvp => new KeyValueEntry() { Key = kvp.Key, Value = kvp.Value }).ToList() }; //var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserSetting); return(context); //var athenaApi = etlSettings.CreatePipesSourceAthenaAPI(); //foreach (var kvp in parserSetting.Partitions) //{ // await athenaApi.LoadPartitionIfNotExists(parserSetting.DefaultTableName, kvp.Key, kvp.Value); //} }
/// <summary> /// compile the pipes and run the definition query /// </summary> /// <param name="etlSettings"></param> /// <returns></returns> public static async Task ParseAthenaQueryPipes(this EtlSettings etlSettings) { if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes) { return; } var pipesSource = etlSettings.AthenaQueryPipesSource; AthenaParserSetting parserLogger = new AthenaParserSetting(); parserLogger.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd(); parserLogger.DefaultTableName = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`"; parserLogger.Date = DateTime.UtcNow.AddDays(-pipesSource.DaysAgo); parserLogger.DateFormat = pipesSource.DateFormat; parserLogger.TempDatabase = pipesSource.TempDatabase; parserLogger.TempTablePath = pipesSource.TempDataPath.FixPathEnd(); pipesSource.ParseErrors = ""; if (!string.IsNullOrWhiteSpace(etlSettings.AthenaQueryPipesSource.Caches)) { try { var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches); foreach (var cache in caches) { if (!cache.S3Path.EndsWith("/")) { cache.S3Path += "/"; } parserLogger.Caches.Add(cache.Key, cache); } } catch (Exception ex) { pipesSource.ParseErrors += ex.Message; pipesSource.ParseErrors += "\n"; } } try { var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserLogger); pipesSource.ParsedQuery = parsed.ToQueryString(); } catch (Exception ex) { pipesSource.ParseErrors = parserLogger.ToString(); pipesSource.ParseErrors += "\n"; pipesSource.ParseErrors += ex.Message; pipesSource.ParsedQuery = ""; } // run if there is definition query if (Regex.IsMatch(pipesSource.AthenaDefinitionSQL, @"\S+")) { var athenaApi = etlSettings.CreatePipesSourceAthenaAPI(); var getResultRequest = await athenaApi.ExecuteQuery(pipesSource.AthenaDefinitionSQL); var response = await athenaApi.ReadOneResult(getResultRequest); etlSettings.Mappings = response.ToFieldMapping(); // load data schema to the etlsetting schema var sample = new DataSample() { Rows = new List <DataRow>() }; var data = response.ReadData(); foreach (var row in data) { var dataRow = new DataRow() { Items = row.Select(item => item.ToString()).ToList() }; sample.Rows.Add(dataRow); } etlSettings.Sample = sample; } }