/// <summary> /// execute athena query and return sample data /// </summary> /// <param name="athenaApi"></param> /// <param name="sql"></param> /// <param name="lines"></param> /// <returns></returns> public static async Task <DataSampleWithSchema> GetSampleDataBySQL(this AWSAthenaAPI athenaApi, string sql) { var result = new DataSampleWithSchema() { FieldMappings = new List <FieldMapping>(), }; var sample = new DataSample() { Rows = new List <DataRow>() }; result.DataSample = sample; // var response = await athenaApi.ExecuteQuery(sql); var getResultRequest = await athenaApi.ExecuteQuery(sql); var response = await athenaApi.ReadOneResult(getResultRequest); var data = response.ReadData(); result.FieldMappings = response.ToFieldMapping(); foreach (var row in data) { var dataRow = new DataRow() { Items = row.Select(item => item.ToString()).ToList() }; sample.Rows.Add(dataRow); } return(result); }
public static async Task <AthenaQueryFlatResult> GetQueryData(this AWSAthenaAPI athena, FormatedQuery query) { var sql = query.BuildQuerySQL(); var request = await athena.ExecuteQuery(sql); return(await athena.GetFlatResult(request)); }
public async Task CreateProductsTable() { var productsTableName = "`productsdb`.`products`"; var queryDeleteTable = $@"DROP TABLE IF EXISTS {productsTableName}"; await awsAthenaAPI.ExecuteQuery(queryDeleteTable); var query = $@"CREATE EXTERNAL TABLE IF NOT EXISTS {productsTableName}( `{nameof(ProductItem.ItemKey)}` STRING, `{nameof(ProductItem.ItemName)}` STRING, `{nameof(ProductItem.Description)}` STRING, `{nameof(ProductItem.UnitPrice)}` DOUBLE, `{nameof(ProductItem.InventoryQuantity)}` DOUBLE ) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = '1' ) LOCATION 's3://{awsS3API.Options.Bucket}/products/'"; await awsAthenaAPI.ExecuteQuery(query); }
public static async Task <bool> CreateAthenaTable(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI) { //create athena database if not exists if (etlSettings.AthenaDatabaseName == null || !regexAthena.IsMatch(etlSettings.AthenaDatabaseName)) { throw new Exception($@"Invalid Athena Database Name '{etlSettings.AthenaDatabaseName}'"); } ; if (etlSettings.AthenaTableName == null || !regexAthena.IsMatch(etlSettings.AthenaTableName)) { throw new Exception($@"Invalid Athena Table Name '{etlSettings.AthenaDatabaseName}'"); } if (etlSettings.Mappings == null || etlSettings.Mappings.Count == 0) { throw new Exception($@"No Fields found for ETL Setting '{etlSettings.Name}'"); } await awsAthenaAPI.ExecuteQuery($@"create database if not exists `{etlSettings.AthenaDatabaseName}`"); // drop the table if it exists await awsAthenaAPI.ExecuteQuery($@"drop table if exists `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`"); var query = $@"CREATE EXTERNAL TABLE IF NOT EXISTS `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`( {etlSettings.MapAthenaFields()} ) PARTITIONED BY ( `{etlSettings.DatePartitionKey}` string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = '1' ) LOCATION 's3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/' "; await awsAthenaAPI.ExecuteQuery(query); return(true); }