/// <summary>
        /// execute athena query and return sample data
        /// </summary>
        /// <param name="athenaApi"></param>
        /// <param name="sql"></param>
        /// <param name="lines"></param>
        /// <returns></returns>
        public static async Task <DataSampleWithSchema> GetSampleDataBySQL(this AWSAthenaAPI athenaApi, string sql)
        {
            var result = new DataSampleWithSchema()
            {
                FieldMappings = new List <FieldMapping>(),
            };
            var sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            result.DataSample = sample;

            // var response = await athenaApi.ExecuteQuery(sql);
            var getResultRequest = await athenaApi.ExecuteQuery(sql);

            var response = await athenaApi.ReadOneResult(getResultRequest);

            var data = response.ReadData();

            result.FieldMappings = response.ToFieldMapping();

            foreach (var row in data)
            {
                var dataRow = new DataRow()
                {
                    Items = row.Select(item => item.ToString()).ToList()
                };
                sample.Rows.Add(dataRow);
            }
            return(result);
        }
Example #2
0
        public static async Task <AthenaQueryFlatResult> GetQueryData(this AWSAthenaAPI athena, FormatedQuery query)
        {
            var sql     = query.BuildQuerySQL();
            var request = await athena.ExecuteQuery(sql);

            return(await athena.GetFlatResult(request));
        }
        public async Task CreateProductsTable()
        {
            var productsTableName = "`productsdb`.`products`";
            var queryDeleteTable  = $@"DROP TABLE IF EXISTS {productsTableName}";
            await awsAthenaAPI.ExecuteQuery(queryDeleteTable);

            var query = $@"CREATE EXTERNAL TABLE IF NOT EXISTS {productsTableName}(
`{nameof(ProductItem.ItemKey)}` STRING,
`{nameof(ProductItem.ItemName)}` STRING,
`{nameof(ProductItem.Description)}` STRING,
`{nameof(ProductItem.UnitPrice)}` DOUBLE,
`{nameof(ProductItem.InventoryQuantity)}` DOUBLE
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
  'serialization.format' = '1'
)
LOCATION 's3://{awsS3API.Options.Bucket}/products/'";
            await awsAthenaAPI.ExecuteQuery(query);
        }
Example #4
0
        public static async Task <bool> CreateAthenaTable(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            //create athena database if not exists

            if (etlSettings.AthenaDatabaseName == null || !regexAthena.IsMatch(etlSettings.AthenaDatabaseName))
            {
                throw new Exception($@"Invalid Athena Database Name '{etlSettings.AthenaDatabaseName}'");
            }
            ;
            if (etlSettings.AthenaTableName == null || !regexAthena.IsMatch(etlSettings.AthenaTableName))
            {
                throw new Exception($@"Invalid Athena Table Name '{etlSettings.AthenaDatabaseName}'");
            }
            if (etlSettings.Mappings == null || etlSettings.Mappings.Count == 0)
            {
                throw new Exception($@"No Fields found for ETL Setting '{etlSettings.Name}'");
            }
            await awsAthenaAPI.ExecuteQuery($@"create database if not exists `{etlSettings.AthenaDatabaseName}`");

            // drop the table if it exists
            await awsAthenaAPI.ExecuteQuery($@"drop table if exists `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`");

            var query = $@"CREATE EXTERNAL TABLE IF NOT EXISTS `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`(
{etlSettings.MapAthenaFields()}
)
PARTITIONED BY (
    `{etlSettings.DatePartitionKey}` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
  'serialization.format' = '1'
)
LOCATION 's3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/'
";

            await awsAthenaAPI.ExecuteQuery(query);

            return(true);
        }