Exemple #1
0
 public void ParseQuereis()
 {
     for (int i = 1; i < 6; i++)
     {
         AthenaParserSetting athenaParserLogger = new AthenaParserSetting();
         Debug.WriteLine($"****** Begin File {i} ******");
         var filename = $"{AppContext.BaseDirectory}/query{i}.sql";
         Debug.WriteLine($"File {i}: {filename}");
         var query = File.ReadAllText(filename);
         try
         {
             var pipes = query.ParseAthenaPipes(athenaParserLogger);
             Debug.WriteLine($"****** End File {i} ******");
             Debug.WriteLine($"****** Json File {i} ******");
             var tree = JsonConvert.SerializeObject(pipes, Formatting.Indented);
             Debug.WriteLine($"****** Parsed File {i} ******");
             Debug.WriteLine(pipes.ToQueryString().StripEmptyLines());
         }
         catch (Exception ex)
         {
             Debug.WriteLine(athenaParserLogger.ToString());
             Debug.Write(ex.Message);
         }
     }
 }
        public static AthenaParserSetting BuildParserSetting(this StateMachineQueryContext context)
        {
            StateMachineSettings settings      = context.settings;
            AthenaParserSetting  parserSetting = new AthenaParserSetting();

            parserSetting.DefaultExportPath = settings.DefaultExportPath;
            parserSetting.DefaultTableName  = settings.DefaultTableName;
            parserSetting.Date          = settings.Date;
            parserSetting.DateFormat    = settings.DateFormat;
            parserSetting.TempDatabase  = settings.TempDatabase;
            parserSetting.TempTablePath = settings.TempTablePath;
            return(parserSetting);
        }
        /// <summary>
        /// run the athena query pipes
        /// </summary>
        /// <param name="etlSettings"></param>
        /// <param name="useDate"></param>
        /// <returns></returns>
        public static async Task RunAthenaQueryPipes(this EtlSettings etlSettings, DateTime?useDate = null)
        {
            if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes)
            {
                return;
            }

            var pipesSource = etlSettings.AthenaQueryPipesSource;

            AthenaParserSetting parserSetting = new AthenaParserSetting();

            parserSetting.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd();
            parserSetting.DefaultTableName  = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`";
            parserSetting.Date = useDate == null?DateTime.UtcNow.AddDays(-pipesSource.DaysAgo) : useDate.Value.AddDays(-pipesSource.DaysAgo);

            parserSetting.DateFormat    = pipesSource.DateFormat;
            parserSetting.TempDatabase  = pipesSource.TempDatabase;
            parserSetting.TempTablePath = pipesSource.TempDataPath.FixPathEnd();

            var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches);

            foreach (var cache in caches)
            {
                if (!cache.S3Path.EndsWith("/"))
                {
                    cache.S3Path += "/";
                }
                parserSetting.Caches.Add(cache.Key, cache);
            }

            var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserSetting);

            await etlSettings.ExecuteControlFlow(parsed, parserSetting);

            var athenaApi = etlSettings.CreatePipesSourceAthenaAPI();

            foreach (var kvp in parserSetting.Partitions)
            {
                await athenaApi.LoadPartitionIfNotExists(parserSetting.DefaultTableName, kvp.Key, kvp.Value);
            }
        }
Exemple #4
0
        public async void StateQueryTests()
        {
            int i = 0;
            StateMachineExecutionResult result;
            StateMachineQueryContext    context;
            {
                i = 1;
                AthenaParserSetting athenaParserLogger = new AthenaParserSetting();
                Debug.WriteLine($"****** Begin File {i} ******");
                var filename = $"{AppContext.BaseDirectory}/query{i}.sql";
                Debug.WriteLine($"File {i}: {filename}");
                var query = File.ReadAllText(filename);
                try
                {
                    var pipes = query.ParseAthenaPipes(athenaParserLogger);
                    Debug.WriteLine($"****** End File {i} ******");
                    Debug.WriteLine($"****** Json File {i} ******");
                    var tree = JsonConvert.SerializeObject(pipes, Formatting.Indented);
                    Debug.WriteLine($"****** Parsed File {i} ******");

                    bool seeking = false;
                    context = new StateMachineQueryContext();

                    // context.ExecuteStateMachineQueryContext()

                    result  = pipes.LoadNextStateMachineQuery(new AthenaParserSetting(), new LinkedList <int>(), context, ref seeking);
                    seeking = true;
                    result  = pipes.LoadNextStateMachineQuery(new AthenaParserSetting(), new LinkedList <int>(), context, ref seeking);
                    Debug.WriteLine(pipes.ToQueryString().StripEmptyLines());
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(athenaParserLogger.ToString());
                    Debug.Write(ex.Message);
                }
            }
        }
        public static StateMachineQueryContext BuildStateMachineQueryContext(this EtlSettings etlSettings, DateTime?useDate = null)
        {
            if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes)
            {
                return(null);
            }

            var pipesSource = etlSettings.AthenaQueryPipesSource;

            AthenaParserSetting parserSetting = new AthenaParserSetting();

            parserSetting.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd();
            parserSetting.DefaultTableName  = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`";
            parserSetting.Date = useDate == null?DateTime.UtcNow.AddDays(-pipesSource.DaysAgo) : useDate.Value.AddDays(-pipesSource.DaysAgo);

            parserSetting.DateFormat    = pipesSource.DateFormat;
            parserSetting.TempDatabase  = pipesSource.TempDatabase;
            parserSetting.TempTablePath = pipesSource.TempDataPath.FixPathEnd();

            var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches);

            foreach (var cache in caches)
            {
                if (!cache.S3Path.EndsWith("/"))
                {
                    cache.S3Path += "/";
                }
                parserSetting.Caches.Add(cache.Key, cache);
            }

            StateMachineQueryContext context = new StateMachineQueryContext();

            context.raw = pipesSource.AthenaSQL;

            context.settings = new StateMachineSettings()
            {
                DefaultExportPath = parserSetting.DefaultExportPath,
                DefaultTableName  = parserSetting.DefaultTableName,
                Date          = parserSetting.Date,
                DateFormat    = parserSetting.DateFormat,
                TempDatabase  = parserSetting.TempDatabase,
                TempTablePath = parserSetting.TempTablePath,
                Caches        = parserSetting.Caches.Values.ToList(),
                Clearings     = parserSetting.Clearings.Select(kvp => new KeyValueEntry()
                {
                    Key = kvp.Key, Value = kvp.Value
                }).ToList(),
                Commands       = parserSetting.Commands,
                DroppingTables = parserSetting.DroppingTables,
                Partitions     = parserSetting.Partitions.Select(kvp => new KeyValueEntry()
                {
                    Key = kvp.Key, Value = kvp.Value
                }).ToList(),
                Variables = parserSetting.Variables.Select(kvp => new KeyValueEntry()
                {
                    Key = kvp.Key, Value = kvp.Value
                }).ToList()
            };

            //var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserSetting);

            return(context);

            //var athenaApi = etlSettings.CreatePipesSourceAthenaAPI();
            //foreach (var kvp in parserSetting.Partitions)
            //{
            //    await athenaApi.LoadPartitionIfNotExists(parserSetting.DefaultTableName, kvp.Key, kvp.Value);
            //}
        }
        /// <summary>
        /// compile the pipes and run the definition query
        /// </summary>
        /// <param name="etlSettings"></param>
        /// <returns></returns>
        public static async Task ParseAthenaQueryPipes(this EtlSettings etlSettings)
        {
            if (etlSettings.SourceType != EtlSourceEnum.AmazonAthenaPipes)
            {
                return;
            }

            var pipesSource = etlSettings.AthenaQueryPipesSource;
            AthenaParserSetting parserLogger = new AthenaParserSetting();

            parserLogger.DefaultExportPath = $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}".FixPathEnd();
            parserLogger.DefaultTableName  = $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`";
            parserLogger.Date          = DateTime.UtcNow.AddDays(-pipesSource.DaysAgo);
            parserLogger.DateFormat    = pipesSource.DateFormat;
            parserLogger.TempDatabase  = pipesSource.TempDatabase;
            parserLogger.TempTablePath = pipesSource.TempDataPath.FixPathEnd();

            pipesSource.ParseErrors = "";

            if (!string.IsNullOrWhiteSpace(etlSettings.AthenaQueryPipesSource.Caches))
            {
                try
                {
                    var caches = JsonConvert.DeserializeObject <List <CacheSetting> >(etlSettings.AthenaQueryPipesSource.Caches);
                    foreach (var cache in caches)
                    {
                        if (!cache.S3Path.EndsWith("/"))
                        {
                            cache.S3Path += "/";
                        }
                        parserLogger.Caches.Add(cache.Key, cache);
                    }
                }
                catch (Exception ex)
                {
                    pipesSource.ParseErrors += ex.Message;
                    pipesSource.ParseErrors += "\n";
                }
            }

            try
            {
                var parsed = pipesSource.AthenaSQL.ParseAthenaPipes(parserLogger);
                pipesSource.ParsedQuery = parsed.ToQueryString();
            }
            catch (Exception ex)
            {
                pipesSource.ParseErrors  = parserLogger.ToString();
                pipesSource.ParseErrors += "\n";
                pipesSource.ParseErrors += ex.Message;
                pipesSource.ParsedQuery  = "";
            }

            // run if there is definition query
            if (Regex.IsMatch(pipesSource.AthenaDefinitionSQL, @"\S+"))
            {
                var athenaApi        = etlSettings.CreatePipesSourceAthenaAPI();
                var getResultRequest = await athenaApi.ExecuteQuery(pipesSource.AthenaDefinitionSQL);

                var response = await athenaApi.ReadOneResult(getResultRequest);

                etlSettings.Mappings = response.ToFieldMapping();
                // load data schema to the etlsetting schema

                var sample = new DataSample()
                {
                    Rows = new List <DataRow>()
                };

                var data = response.ReadData();
                foreach (var row in data)
                {
                    var dataRow = new DataRow()
                    {
                        Items = row.Select(item => item.ToString()).ToList()
                    };
                    sample.Rows.Add(dataRow);
                }

                etlSettings.Sample = sample;
            }
        }