internal Task<ResultSetMetadata> PopulateMetadataAsync(CancellationToken cancellationToken) { return ExecuteHelper.WithErrorTranslationAndProfiling( async () => _metadata ?? (_metadata = await _resultSet.GetMetadataAsync(cancellationToken) .ConfigureAwait(false)), "SpannerDataReader.GetMetadata"); }
public static void WriteAthenaRowsAsParquet(this Stream stream, ResultSetMetadata tableSchema, List <FieldMapping> mappings, IEnumerable <Row> rows) { List <DataColumn> columns = new List <DataColumn>(); int index = 0; foreach (var column in tableSchema.ColumnInfo) { columns.Add(column.ToParquetColumn(mappings, index, rows)); index++; } Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray())); using (ParquetWriter writer = new ParquetWriter(schema, stream)) { writer.CompressionMethod = CompressionMethod.Snappy; using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) { foreach (var column in columns) { rowGroupWriter.WriteColumn(column); } } } }
public static async Task WriteResultRowsToS3Bucket(this AWSS3API awsS3Api, List <Row> rows, ResultSetMetadata metadata, EtlSettings etlSettings, string s3Key) { using (MemoryStream gaStream = new MemoryStream()) { gaStream.WriteAthenaRowsAsParquet(metadata, etlSettings.Mappings, rows); using (MemoryStream uploadStream = new MemoryStream(gaStream.ToArray())) { await awsS3Api.Upload(s3Key, uploadStream); } } rows.Clear(); }
public static async Task <List <string> > TransferAthenaQueryResultByDate(this EtlSettings etlSettings, AWSAthenaAPI awsAthenaAPI) { var result = new List <string>(); var athena = etlSettings.AthenaQuerySource; if (athena == null) { throw new Exception("The ETL has an empty Athena source setting."); } var athenaApi = etlSettings.CreateSourceAthenaAPI(); var query = athena.AthenaSQL; var today = DateTime.Now; var date = today.AddDays(-athena.DaysAgo); query = query.Replace("{date}", date.ToString(athena.DateFormat)); var dateKey = date.ToString("yyyyMMdd"); // var response = await athenaApi.ExecuteQuery(query); var getResultRequest = await athenaApi.ExecuteQuery(query); //var response = await athenaApi.ReadOneResult(getResultRequest); //var enumerator = response.ResultSet.Rows.GetEnumerator(); ResultSetMetadata resultSetMetadata = null; var enumerator = athenaApi.EnumerateRows(getResultRequest, res => resultSetMetadata = res.ResultSet.ResultSetMetadata).GetEnumerator(); List <Row> rows = new List <Row>(); int parquetIndex = 0; var targetS3 = etlSettings.CreateTargetS3API(); //skip first row; enumerator.MoveNext(); while (enumerator.MoveNext()) { rows.Add(enumerator.Current); if (rows.Count >= etlSettings.NumberOfItemsPerParquet) { var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex); await targetS3.WriteResultRowsToS3Bucket(rows, resultSetMetadata, etlSettings, s3key); result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}"); parquetIndex += 1; } } // write what ever left less than 200000 if (rows.Count > 0) { var s3key = etlSettings.MakeTargetS3Key(dateKey, "", false, parquetIndex); await targetS3.WriteResultRowsToS3Bucket(rows, resultSetMetadata, etlSettings, s3key); result.Add($"s3://{etlSettings.TargetS3BucketName}/{s3key}"); parquetIndex += 1; } { // load partition to athena table await awsAthenaAPI.LoadPartition( $"`{etlSettings.AthenaDatabaseName}`.`{etlSettings.AthenaTableName}`", $"`{etlSettings.DatePartitionKey}` = '{dateKey}'", $"s3://{etlSettings.TargetS3BucketName}/{etlSettings.TargetS3Prefix}/{dateKey}/"); } return(result); }
private static ReliableStreamReader CreateReader(PartialResultSet[] results, ResultSetMetadata metadata = null) { results[0].Metadata = metadata ?? CreateSingleBytesFieldMetadata(); return(new ReliableStreamReader(new AsyncStreamAdapter <PartialResultSet>(results.ToAsyncEnumerable().GetAsyncEnumerator()), Logger.DefaultLogger)); }
private static SpannerDataReader CreateSpannerDataReader( PartialResultSet[] results, ResultSetMetadata metadata = null) => new SpannerDataReader( Logger.DefaultLogger, CreateReader(results, metadata), Timestamp.FromDateTime(DateTime.UtcNow), null, SpannerConversionOptions.Default, false, 120);