Esempio n. 1
0
        public static async Task <bool> CreateAthenaTable(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            //create athena database if not exists

            if (etlSettings.AthenaDatabaseName == null || !regexAthena.IsMatch(etlSettings.AthenaDatabaseName))
            {
                throw new Exception($@"Invalid Athena Database Name '{etlSettings.AthenaDatabaseName}'");
            }
            ;
            if (etlSettings.AthenaTableName == null || !regexAthena.IsMatch(etlSettings.AthenaTableName))
            {
                throw new Exception($@"Invalid Athena Table Name '{etlSettings.AthenaDatabaseName}'");
            }
            if (etlSettings.Mappings == null || etlSettings.Mappings.Count == 0)
            {
                throw new Exception($@"No Fields found for ETL Setting '{etlSettings.Name}'");
            }
            await awsAthenaAPI.ExecuteQuery($@"create database if not exists `{etlSettings.AthenaDatabaseName}`");

            // drop the table if it exists
            await awsAthenaAPI.ExecuteQuery($@"drop table if exists `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`");

            var query = $@"CREATE EXTERNAL TABLE IF NOT EXISTS `{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`(
{etlSettings.MapAthenaFields()}
)
PARTITIONED BY (
    `{etlSettings.DatePartitionKey}` string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
  'serialization.format' = '1'
)
LOCATION 's3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/'
";

            await awsAthenaAPI.ExecuteQuery(query);

            return(true);
        }