Exemplo n.º 1
0
 internal Task<ResultSetMetadata> PopulateMetadataAsync(CancellationToken cancellationToken)
 {
     return ExecuteHelper.WithErrorTranslationAndProfiling(
         async ()
             => _metadata ?? (_metadata = await _resultSet.GetMetadataAsync(cancellationToken)
                 .ConfigureAwait(false)), "SpannerDataReader.GetMetadata");
 }
Exemplo n.º 2
0
        public static void WriteAthenaRowsAsParquet(this Stream stream, ResultSetMetadata tableSchema, List <FieldMapping> mappings, IEnumerable <Row> rows)
        {
            List <DataColumn> columns = new List <DataColumn>();

            int index = 0;

            foreach (var column in tableSchema.ColumnInfo)
            {
                columns.Add(column.ToParquetColumn(mappings, index, rows));
                index++;
            }

            Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray()));

            using (ParquetWriter writer = new ParquetWriter(schema, stream))
            {
                writer.CompressionMethod = CompressionMethod.Snappy;
                using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup())
                {
                    foreach (var column in columns)
                    {
                        rowGroupWriter.WriteColumn(column);
                    }
                }
            }
        }
 public static async Task WriteResultRowsToS3Bucket(this AWSS3API awsS3Api, List <Row> rows, ResultSetMetadata metadata, EtlSettings etlSettings, string s3Key)
 {
     using (MemoryStream gaStream = new MemoryStream())
     {
         gaStream.WriteAthenaRowsAsParquet(metadata, etlSettings.Mappings, rows);
         using (MemoryStream uploadStream = new MemoryStream(gaStream.ToArray()))
         {
             await awsS3Api.Upload(s3Key, uploadStream);
         }
     }
     rows.Clear();
 }
        public static async Task <List <string> > TransferAthenaQueryResultByDate(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI)
        {
            var result = new List <string>();
            var athena = etlSettings.AthenaQuerySource;

            if (athena == null)
            {
                throw new Exception("The ETL has an empty Athena source setting.");
            }
            var athenaApi = etlSettings.CreateSourceAthenaAPI();

            var query = athena.AthenaSQL;
            var today = DateTime.Now;
            var date  = today.AddDays(-athena.DaysAgo);

            query = query.Replace("{date}", date.ToString(athena.DateFormat));
            var dateKey = date.ToString("yyyyMMdd");

            // var response = await athenaApi.ExecuteQuery(query);

            var getResultRequest = await athenaApi.ExecuteQuery(query);

            //var response = await athenaApi.ReadOneResult(getResultRequest);

            //var enumerator = response.ResultSet.Rows.GetEnumerator();
            ResultSetMetadata resultSetMetadata = null;

            var enumerator = athenaApi.EnumerateRows(getResultRequest, res => resultSetMetadata = res.ResultSet.ResultSetMetadata).GetEnumerator();

            List <Row> rows = new List <Row>();

            int parquetIndex = 0;

            var targetS3 = etlSettings.CreateTargetS3API();

            //skip first row;
            enumerator.MoveNext();
            while (enumerator.MoveNext())
            {
                rows.Add(enumerator.Current);
                if (rows.Count >= etlSettings.NumberOfItemsPerParquet)
                {
                    var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex);
                    await targetS3.WriteResultRowsToS3Bucket(rows, resultSetMetadata, etlSettings, s3key);

                    result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                    parquetIndex += 1;
                }
            }

            // write what ever left less than 200000
            if (rows.Count > 0)
            {
                var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex);
                await targetS3.WriteResultRowsToS3Bucket(rows, resultSetMetadata, etlSettings, s3key);

                result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}");
                parquetIndex += 1;
            }

            {
                // load partition to athena table
                await awsAthenaAPI.LoadPartition(
                    $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`",
                    $"`{etlSettings.DatePartitionKey}` = '{dateKey}'",
                    $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/");
            }

            return(result);
        }
 private static ReliableStreamReader CreateReader(PartialResultSet[] results, ResultSetMetadata metadata = null)
 {
     results[0].Metadata = metadata ?? CreateSingleBytesFieldMetadata();
     return(new ReliableStreamReader(new AsyncStreamAdapter <PartialResultSet>(results.ToAsyncEnumerable().GetAsyncEnumerator()), Logger.DefaultLogger));
 }
 private static SpannerDataReader CreateSpannerDataReader(
     PartialResultSet[] results,
     ResultSetMetadata metadata = null) =>
 new SpannerDataReader(
     Logger.DefaultLogger, CreateReader(results, metadata), Timestamp.FromDateTime(DateTime.UtcNow),
     null, SpannerConversionOptions.Default, false, 120);