/// <summary>
        /// execute athena query and return sample data
        /// </summary>
        /// <param name="athenaApi"></param>
        /// <param name="sql"></param>
        /// <param name="lines"></param>
        /// <returns></returns>
        public static async Task <DataSampleWithSchema> GetSampleDataBySQL(this AWSAthenaAPI athenaApi, string sql)
        {
            var result = new DataSampleWithSchema()
            {
                FieldMappings = new List <FieldMapping>(),
            };
            var sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            result.DataSample = sample;

            // var response = await athenaApi.ExecuteQuery(sql);
            var getResultRequest = await athenaApi.ExecuteQuery(sql);

            var response = await athenaApi.ReadOneResult(getResultRequest);

            var data = response.ReadData();

            result.FieldMappings = response.ToFieldMapping();

            foreach (var row in data)
            {
                var dataRow = new DataRow()
                {
                    Items = row.Select(item => item.ToString()).ToList()
                };
                sample.Rows.Add(dataRow);
            }
            return(result);
        }
Ejemplo n.º 2
0
        public static async Task <AthenaQueryFlatResult> GetQueryData(this AWSAthenaAPI athena, FormatedQuery query)
        {
            var sql     = query.BuildQuerySQL();
            var request = await athena.ExecuteQuery(sql);

            return(await athena.GetFlatResult(request));
        }
 public MainWindow()
 {
     componentContext = (App.Current as App).Services;
     s3      = componentContext.Resolve <AWSS3API>();
     athena  = componentContext.Resolve <AWSAthenaAPI>();
     options = componentContext.Resolve <AthenaClientOptions>();
     InitializeComponent();
     s3Tree.ItemsSource     = S3ItemsSource;
     tabQueries.ItemsSource = FormatedQuerySource;
     LoadS3();
 }
        public static async Task DropAthenaTable(this AWSAthenaAPI athenaApi, string tableName)
        {
            string query = $"DROP TABLE IF EXISTS {tableName}";

            Console.WriteLine(query);
            var executionId = await athenaApi.StartQuery(query);

            while (!await athenaApi.IsExecutionCompleted(executionId))
            {
                Thread.Sleep(2000);
            }
        }
        public static async Task <string> StartSampleDataBySQL(this AWSAthenaAPI athenaApi, string sql)
        {
            var result = new DataSampleWithSchema()
            {
                FieldMappings = new List <FieldMapping>(),
            };
            var sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            result.DataSample = sample;

            // var response = await athenaApi.ExecuteQuery(sql);
            return(await athenaApi.StartQuery(sql));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Process S3 Etl Event
        /// </summary>
        /// <param name="reportingAwsS3Api">The S3 bucket access for the reporting settings</param>
        /// <param name="listKey">s3 event list handler json key</param>
        /// <param name="etlPrefix">etl settings prefex</param>
        /// <param name="awsAthenaAPI">The target athena</param>
        /// <param name="bucketName">event source bucket name</param>
        /// <param name="s3FileKey">event source s3 key</param>
        /// <returns></returns>
        public static async Task <string> ProcessS3EtlEvent(this AWSS3API reportingAwsS3Api, string listKey, string etlPrefix,
                                                            AWSAthenaAPI awsAthenaAPI, string bucketName, string s3FileKey, GenericLogger logger = null)
        {
            if (!await reportingAwsS3Api.FileExists(listKey))
            {
                return($"event handler setting does not exist: '{listKey}'");
            }

            var json = await reportingAwsS3Api.ReadAsString(listKey);

            logger?.Log?.Invoke(json);
            var list = JsonConvert.DeserializeObject <List <S3EventHandler> >(json);

            var found = list.FirstOrDefault(handler => handler.BucketName == bucketName && Regex.IsMatch(s3FileKey, handler.PathRegex));

            if (found == null)
            {
                return($"event handler not found for object: 's3://{bucketName}/{s3FileKey}'");
            }

            var etlkey = $"{etlPrefix}{found.EtlName}.json";

            logger?.Log?.Invoke($"Find ETL setting: {etlkey}");
            if (!await reportingAwsS3Api.FileExists(etlkey))
            {
                return($"etl setting does not exist: '{etlkey}'");
            }
            ;

            var jsonEtl = await reportingAwsS3Api.ReadAsString(etlkey);

            var etlSettings = JsonConvert.DeserializeObject <EtlSettings>(jsonEtl);

            // assign the s3FileKey to the ExamplePath and tell people around the deal
            etlSettings.S3EventSource.ExamplePath = s3FileKey;

            var results = await etlSettings.TransferData(awsAthenaAPI);

            return(string.Join("\n", results));
        }
Ejemplo n.º 7
0
        public MainWindow()
        {
            componentContext = (App.Current as App).Services;

            var encryptedOptions = componentContext.Resolve <EncryptedOptions>();

            DecryptedOptions decryptedOptions = LoadOptions(encryptedOptions);

            if (decryptedOptions == null)
            {
                Application.Current.Shutdown();
                return;
            }
            s3      = new AWSS3API(decryptedOptions.AWSS3Options);
            athena  = new AWSAthenaAPI(decryptedOptions.AWSAthenaOptions);
            options = decryptedOptions.AthenaClientOptions;
            InitializeComponent();
            s3Tree.ItemsSource     = S3ItemsSource;
            tabQueries.ItemsSource = FormatedQuerySource;
            dgJobList.ItemsSource  = QueryTasks;
            LoadS3();
        }
        public static async Task LoadAthenaParition(this AWSAthenaAPI athenaApi, string tableName, string key, string location)
        {
            string dropQuery = $"ALTER TABLE {tableName} DROP IF EXISTS PARTITION ({key})";

            Console.WriteLine(dropQuery);
            var dropExecutionId = await athenaApi.StartQuery(dropQuery);

            while (!await athenaApi.IsExecutionCompleted(dropExecutionId))
            {
                Thread.Sleep(500);
            }

            string addQuery = $"ALTER TABLE {tableName} ADD IF NOT EXISTS PARTITION ({key}) LOCATION '{location}'";

            Console.WriteLine(addQuery);
            var addExecutionId = await athenaApi.StartQuery(addQuery);

            while (!await athenaApi.IsExecutionCompleted(addExecutionId))
            {
                Thread.Sleep(500);
            }
        }
        public static async Task ClearAthenaTable(this AWSAthenaAPI athenaApi, AWSS3API awsS3Api, string tableName, string s3Path)
        {
            Console.WriteLine($"DROP TABLE IF EXISTS {tableName}");
            var executionId = await athenaApi.StartQuery($"DROP TABLE IF EXISTS {tableName}");

            while (!await athenaApi.IsExecutionCompleted(executionId))
            {
                Thread.Sleep(2000);
            }
            var s3Object = s3Path.ParseS3URI();

            if (s3Object is S3Object)
            {
                Console.WriteLine($"Delete S3: {s3Path}");
                var files = await awsS3Api.ListFiles(s3Object.Key, "/", s3Object.BucketName);

                if (files.Any())
                {
                    await awsS3Api.Delete(files.Select(key => $"{s3Object.Key}{key}"), s3Object.BucketName);
                }
                Console.WriteLine($"{s3Path}: {files.Count} S3 Files Deleted");
            }
        }
        public static async Task <DataSampleWithSchema> TryObtainSampleDataResult(this AWSAthenaAPI athenaApi, string executionId)
        {
            if (await athenaApi.IsExecutionCompleted(executionId))
            {
                var result = new DataSampleWithSchema()
                {
                    FieldMappings = new List <FieldMapping>(),
                };
                var sample = new DataSample()
                {
                    Rows = new List <DataRow>()
                };
                result.DataSample = sample;
                var response = await athenaApi.ReadOneResult(new GetQueryResultsRequest()
                {
                    QueryExecutionId = executionId
                });

                var data = response.ReadData();
                result.FieldMappings = response.ToFieldMapping();

                foreach (var row in data)
                {
                    var dataRow = new DataRow()
                    {
                        Items = row.Select(item => item.ToString()).ToList()
                    };
                    sample.Rows.Add(dataRow);
                }
                return(result);
            }
            else
            {
                return(null);
            }
        }
        public static async Task <List <string> > TransferAthenaQueryResultByDate(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            var result = new List <string>();
            var athena = etlSettings.AthenaQuerySource;

            if (athena == null)
            {
                throw new Exception("The ETL has an empty Athena source setting.");
            }
            var athenaApi = etlSettings.CreateSourceAthenaAPI();

            var query = athena.AthenaSQL;
            var today = DateTime.Now;
            var date  = today.AddDays(-athena.DaysAgo);

            query = query.Replace("{date}", date.ToString(athena.DateFormat));
            var dateKey = date.ToString("yyyyMMdd");

            // var response = await athenaApi.ExecuteQuery(query);

            var getResultRequest = await athenaApi.ExecuteQuery(query);

            //var response = await athenaApi.ReadOneResult(getResultRequest);

            //var enumerator = response.ResultSet.Rows.GetEnumerator();
            ResultSetMetadata resultSetMetadata = null;

            var enumerator = athenaApi.EnumerateRows(getResultRequest, res => resultSetMetadata = res.ResultSet.ResultSetMetadata).GetEnumerator();

            List <Row> rows = new List <Row>();

            int parquetIndex = 0;

            var targetS3 = etlSettings.CreateTargetS3API();

            //skip first row;
            enumerator.MoveNext();
            while (enumerator.MoveNext())
            {
                rows.Add(enumerator.Current);
                if (rows.Count >= etlSettings.NumberOfItemsPerParquet)
                {
                    var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex);
                    await targetS3.WriteResultRowsToS3Bucket(rows, resultSetMetadata, etlSettings, s3key);

                    result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                    parquetIndex += 1;
                }
            }

            // write what ever left less than 200000
            if (rows.Count > 0)
            {
                var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex);
                await targetS3.WriteResultRowsToS3Bucket(rows, resultSetMetadata, etlSettings, s3key);

                result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                parquetIndex += 1;
            }

            {
                // load partition to athena table
                await awsAthenaAPI.LoadPartition(
                    $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`",
                    $"`{etlSettings.DatePartitionKey}` = '{dateKey}'",
                    $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/");
            }

            return(result);
        }
Ejemplo n.º 12
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="etlSettings"></param>
        /// <returns></returns>
        public static async Task <List <string> > TransferBigQueryResultByDate(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            var result = new List <string>();

            var awsS3Api = etlSettings.CreateTargetS3API();
            var ga       = etlSettings.GoogleAnalyticsQuerySource;

            Environment.SetEnvironmentVariable("GOOGLE_APPLICATION_CREDENTIALS", $"{AppContext.BaseDirectory}/{ga.GoogleAnalyticsSettingFile}");

            BigQueryClient client = BigQueryClient.Create(ga.GoogleAnalyticsProjectId);

            string sql = ga.BigQuerySQL;

            string dateQueryKey = DateTime.Now.AddDays(-ga.DaysAgo).ToString(ga.DateFormat);
            string dateKey      = DateTime.Now.AddDays(-ga.DaysAgo).ToString("yyyyMMdd");

            sql = sql.Replace("{date}", dateKey);

            var job = await client.CreateQueryJobAsync(sql, new List <BigQueryParameter>());

            BigQueryResults results = null;

            results = await client.GetQueryResultsAsync(job.Reference, new GetQueryResultsOptions()
            {
                StartIndex = 0,
                PageSize   = 20000,
            });

            var enumerator = results.GetEnumerator();

            List <BigQueryRow> rows = new List <BigQueryRow>();

            int parquetIndex = 0;

            var targetS3 = etlSettings.CreateTargetS3API();

            while (enumerator.MoveNext())
            {
                rows.Add(enumerator.Current);
                if (rows.Count >= etlSettings.NumberOfItemsPerParquet)
                {
                    var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex);
                    await targetS3.WriteResultRowsToS3Bucket(rows, results, etlSettings, s3key);

                    result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                    parquetIndex += 1;
                }
            }

            // write what ever left less than 200000
            if (rows.Count > 0)
            {
                var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex);
                await targetS3.WriteResultRowsToS3Bucket(rows, results, etlSettings, s3key);

                result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                parquetIndex += 1;
            }

            {
                // load partition to athena table
                await awsAthenaAPI.LoadPartition(
                    $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`",
                    $"`{etlSettings.DatePartitionKey}` = '{dateKey}'",
                    $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/");
            }

            return(result);
        }
Ejemplo n.º 13
0
        public static async Task <List <string> > TransferCsvStream(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI, Stream stream, string dateKey, string filename, bool keepOriginalName)
        {
            var result = new List <string>();
            var config = new CsvConfiguration(CultureInfo.InvariantCulture)
            {
                Delimiter = etlSettings.CsvSourceOptoins.Delimiter
            };

            var csvStream = stream;

            if (etlSettings.CsvSourceOptoins.GZip)
            {
                csvStream = new GZipStream(stream, CompressionMode.Decompress);
            }

            using (var csvStreamReader = new StreamReader(csvStream))
            {
                using (var csvReader = new CsvReader(csvStreamReader, config))
                {
                    var headers      = new List <string>();
                    int parquetIndex = 0;

                    var targetS3 = etlSettings.CreateTargetS3API();

                    if (etlSettings.HasHeader)
                    {
                        csvReader.Read();
                        string header = null;
                        int    index  = 0;
                        while (csvReader.TryGetField(index, out header))
                        {
                            headers.Add(header);
                            index++;
                        }
                    }
                    var mappings = etlSettings.Mappings.ToDictionary(m => m.SourceFieldName, m => m);
                    List <List <string> > data = new List <List <string> >();
                    while (csvReader.Read())
                    {
                        int    index = 0;
                        string value = null;
                        var    row   = new List <string>();
                        while (csvReader.TryGetField(index, out value))
                        {
                            if (headers.Count == index)
                            {
                                headers.Add($"Col{index}");
                            }
                            row.Add(value);
                            index++;
                        }
                        data.Add(row);
                        if (data.Count >= etlSettings.NumberOfItemsPerParquet)
                        {
                            var s3key = etlSettings.MakeTargetS3Key(dateKey, filename, keepOriginalName, parquetIndex);
                            using (var bufferStream = new MemoryStream())
                            {
                                bufferStream.WriteParquet(etlSettings.Mappings.Select(m => m.ToParquetField()).ToList(), data);
                                await targetS3.Upload(s3key, new MemoryStream(bufferStream.ToArray()));
                            }
                            data.Clear();
                            result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                            parquetIndex++;
                        }
                    }
                    {
                        var s3key = etlSettings.MakeTargetS3Key(dateKey, filename, keepOriginalName, parquetIndex);
                        using (var bufferStream = new MemoryStream())
                        {
                            bufferStream.WriteParquet(etlSettings.Mappings.Select(m => m.ToParquetField()).ToList(), data);
                            await targetS3.Upload(s3key, new MemoryStream(bufferStream.ToArray()));
                        }
                        data.Clear();
                        result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                        parquetIndex++;
                    }
                    {
                        // load partition to athena table
                        await awsAthenaAPI.LoadPartition(
                            $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`",
                            $"`{etlSettings.DatePartitionKey}` = '{dateKey}'",
                            $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/");
                    }
                    {
                        // upload the flag file
                        var s3key = etlSettings.TargetFlagFile(filename);
                        await targetS3.Upload(s3key, new MemoryStream(Encoding.UTF8.GetBytes("OK")));

                        result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                    }
                }
            }
            return(result);
        }
Ejemplo n.º 14
0
        public static async Task <List <string> > LoadAllPartitions(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            var results     = new List <string>();
            var targetS3Api = etlSettings.CreateTargetS3API();

            var allPaths = await targetS3Api.ListPaths($"{etlSettings.TargetS3Prefix}/", "/");

            foreach (var path in allPaths)
            {
                var dateKey = path.Replace("/", "");
                await awsAthenaAPI.LoadPartition(
                    $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`",
                    $"`{etlSettings.DatePartitionKey}` = '{dateKey}'",
                    $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/");

                results.Add($"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/");
            }
            return(results);
        }
 public static async Task DropAthenaTables(this StateMachineQueryContext context, AWSAthenaAPI athenaApi)
 {
     foreach (var dropping in context.settings.DroppingTables)
     {
         await DropAthenaTable(athenaApi, dropping);
     }
 }
 public AthenaTableSetup(AWSS3API awsS3API, AWSAthenaAPI awsAthenaAPI)
 {
     this.awsS3API     = awsS3API;
     this.awsAthenaAPI = awsAthenaAPI;
 }
Ejemplo n.º 17
0
        public static async Task <bool> CreateAthenaTable(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            //create athena database if not exists

            if (etlSettings.AthenaDatabaseName == null || !regexAthena.IsMatch(etlSettings.AthenaDatabaseName))
            {
                throw new Exception($@"Invalid Athena Database Name '{etlSettings.AthenaDatabaseName}'");
            }
            ;
            if (etlSettings.AthenaTableName == null || !regexAthena.IsMatch(etlSettings.AthenaTableName))
            {
                throw new Exception($@"Invalid Athena Table Name '{etlSettings.AthenaDatabaseName}'");
            }
            if (etlSettings.Mappings == null || etlSettings.Mappings.Count == 0)
            {
                throw new Exception($@"No Fields found for ETL Setting '{etlSettings.Name}'");
            }
            await awsAthenaAPI.ExecuteQuery($@"create database if not exists `{etlSettings.AthenaDatabaseName}`");

            // drop the table if it exists
            await awsAthenaAPI.ExecuteQuery($@"drop table if exists `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`");

            var query = $@"CREATE EXTERNAL TABLE IF NOT EXISTS `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`(
{etlSettings.MapAthenaFields()}
)
PARTITIONED BY (
    `{etlSettings.DatePartitionKey}` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
  'serialization.format' = '1'
)
LOCATION 's3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/'
";

            await awsAthenaAPI.ExecuteQuery(query);

            return(true);
        }
        public static async Task ClearAthenaTables(this StateMachineQueryContext context, AWSAthenaAPI athenaApi, AWSS3API awsS3Api)
        {
            var parserSetting = context.BuildParserSetting();
            var pipes         = context.raw.ParseAthenaPipes(parserSetting);

            foreach (var clearing in parserSetting.Clearings)
            {
                await athenaApi.ClearAthenaTable(awsS3Api, clearing.Key, clearing.Value);
            }
        }
 public static async Task LoadPartitions(this StateMachineQueryContext context, AWSAthenaAPI athenaApi)
 {
     //Console.WriteLine("LoadPartitions:");
     foreach (var patition in context.settings.Partitions)
     {
         await LoadAthenaParition(athenaApi, context.settings.DefaultTableName, patition.Key, patition.Value);
     }
 }
Ejemplo n.º 20
0
        public static async Task <List <string> > TransferData(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI, GenericLogger logger = null, DateTime?useDate = null)
        {
            var result = new List <string>();

            logger?.Log?.Invoke($"ETL Mode: {etlSettings.SourceType}");

            switch (etlSettings.SourceType)
            {
            case EtlSourceEnum.SFTP:
            {
                var sftp      = etlSettings.SFTPSource;
                var nameRegex = new Regex(sftp.PathRegex);
                var dateRegex = new Regex(sftp.DateKeyRegex);
                using (var sftpClient = new SftpClient(sftp.Host, sftp.Username, sftp.Password))
                {
                    sftpClient.Connect();
                    var files = sftpClient.ListDirectory(sftp.BasePath);
                    files = files
                            .Where(f => nameRegex.IsMatch(f.FullName) && dateRegex.IsMatch(f.Name))
                            .OrderByDescending(f => f.Name)
                            .ToList();
                    // find in the target to work out if there is the corresponding parquet file
                    var      targetS3 = etlSettings.CreateTargetS3API();
                    SftpFile first    = null;
                    foreach (var file in files)
                    {
                        Console.WriteLine($"Check File: {file.FullName}");
                        var s3Key = etlSettings.TargetFlagFile(file.Name);
                        if (!await targetS3.FileExists(s3Key))
                        {
                            first = file;
                            break;
                        }
                    }
                    // transfer that file
                    if (first != null)
                    {
                        Console.WriteLine($"Transfer File: {first.FullName}");
                        var dateKey = first.Name.MakeRegexExtraction(dateRegex);
                        using (var sftpStream = sftpClient.OpenRead(first.FullName))
                        {
                            result = await etlSettings.TransferCsvStream(awsAthenaAPI, sftpStream, dateKey, first.Name, false);
                        }
                    }
                    sftpClient.Disconnect();
                }
            }
            break;

            case EtlSourceEnum.S3BucketCheck:
            {
            }
            break;

            case EtlSourceEnum.S3BucketEvent:
            {
                var sourceAwsS3Api = new AWSS3API(new AWSS3Options()
                    {
                        Key    = etlSettings.S3EventSource.Key,
                        Secret = etlSettings.S3EventSource.Secret,
                        Bucket = etlSettings.S3EventSource.BucketName,
                        Region = etlSettings.S3EventSource.Region
                    });
                var s3Event   = etlSettings.S3EventSource;
                var nameRegex = new Regex(s3Event.PathRegex);
                var keyRegex  = new Regex(s3Event.FileNameRegex);
                // do nothing if it does not match the path pattern
                if (!nameRegex.IsMatch(s3Event.ExamplePath) || (!keyRegex.IsMatch(s3Event.ExamplePath)))
                {
                    return(result);
                }

                // generate dateKey
                var dateKey = DateTime.UtcNow.ToString("yyyyMMdd");

                Regex dateRegex = null;
                if (!s3Event.UseEventDateAsDateKey)
                {
                    dateRegex = new Regex(s3Event.DateKeyRegex);
                    if (!dateRegex.IsMatch(s3Event.ExamplePath))
                    {
                        return(result);
                    }
                    dateKey = s3Event.ExamplePath.MakeRegexExtraction(dateRegex);
                }

                // generate file name

                var filename = s3Event.ExamplePath.MakeRegexExtraction(keyRegex);

                // it will overwrite by default we need to workout datekey first of all
                var prefixUpToDate = etlSettings.MakeTargetS3Prefix(dateKey, filename, true);

                // check files that should be deleted
                var targetAwsS3Api = etlSettings.CreateTargetS3API();
                var oldObjects     = await targetAwsS3Api.ListAllObjectsInBucket(prefix : prefixUpToDate);

                // delete the files with those prefix
                foreach (var oldObj in oldObjects)
                {
                    await targetAwsS3Api.Delete(oldObj.Key);
                }

                // open file stream and transfer data
                using (var awsS3Stream = await sourceAwsS3Api.OpenReadAsync(s3Event.ExamplePath))
                {
                    result = await etlSettings.TransferCsvStream(awsAthenaAPI, awsS3Stream, dateKey, filename, true);
                }
            }
            break;

            case EtlSourceEnum.GoogleAnalytics:
            {
                result = await etlSettings.TransferBigQueryResultByDate(awsAthenaAPI, useDate);
            }
            break;

            case EtlSourceEnum.AmazonAthena:
            {
                result = await etlSettings.TransferAthenaQueryResultByDate(awsAthenaAPI, useDate);
            }
            break;

            case EtlSourceEnum.AmazonAthenaPipes:
            {
                await etlSettings.RunAthenaQueryPipes(useDate);
            }
            break;
            }
            return(result);
        }
Ejemplo n.º 21
0
 public AsyncLogic(AWSAthenaAPI awsAthenaAPI, AWSBatchAPI awsBatchAPI)
 {
     this.awsAthenaAPI = awsAthenaAPI;
     this.awsBatchAPI  = awsBatchAPI;
 }
 public static async Task ClearTempTables(this StateMachineQueryContext context, AWSAthenaAPI athenaApi, AWSS3API awsS3Api)
 {
     foreach (var clearing in context.settings.Clearings)
     {
         await athenaApi.ClearAthenaTable(awsS3Api, clearing.Key, clearing.Value);
     }
     context.settings.Clearings.Clear();
 }