internal StripeStreamReaderCollection(Stream inputStream, StripeFooter stripeFooter, long stripeOffset, CompressionKind compressionKind) { var offset = stripeOffset; foreach (var stream in stripeFooter.Streams) { if (_underlyingCollection.Exists(s => s.ColumnId == stream.Column && s.StreamKind == stream.Kind)) { throw new InvalidOperationException( $"More than one stream matching {nameof(stream.Column)} ({stream.Column}) and {nameof(stream.Kind)} ({stream.Kind}) found in {nameof(StripeFooter)}"); } var column = stripeFooter.Columns[(int)stream.Column]; _underlyingCollection.Add(new StripeStreamReader( inputStream, stream.Column, stream.Kind, column.Kind, offset, stream.Length, compressionKind )); offset += (long)stream.Length; } }
public static void AddColumn(this StripeFooter footer, ColumnEncodingKind columnEncodingKind, uint dictionarySize = 0) { var columnEncoding = new ColumnEncoding { Kind = columnEncodingKind, DictionarySize = dictionarySize }; footer.Columns.Add(columnEncoding); }
public static void AddDataStream(this StripeFooter footer, uint columnId, OrcCompressedBuffer buffer) { var stream = new Protocol.Stream { Column = columnId, Kind = buffer.StreamKind, Length = (ulong)buffer.Length }; footer.Streams.Add(stream); }
private void CompleteStripe() { var stripeFooter = new StripeFooter(); var stripeStats = new StripeStatistics(); //Columns foreach (var writer in _columnWriters) { writer.ColumnWriter.FlushBuffers(); var dictionaryLength = (writer.ColumnWriter as StringWriter)?.DictionaryLength ?? 0; //DictionaryLength is only used by StringWriter stripeFooter.AddColumn(writer.ColumnWriter.ColumnEncoding, dictionaryLength); } var stripeInformation = new StripeInformation { Offset = (ulong)_outputStream.Position, NumberOfRows = (ulong)_rowsInStripe }; //Indexes foreach (var writer in _columnWriters) { //Write the index buffer var indexBuffer = _bufferFactory.CreateBuffer(StreamKind.RowIndex); writer.ColumnWriter.Statistics.WriteToBuffer(indexBuffer); indexBuffer.CopyTo(_outputStream); //Add the index to the footer stripeFooter.AddDataStream(writer.ColumnWriter.ColumnId, indexBuffer); //Collect summary statistics var columnStats = new ColumnStatistics(); foreach (var stats in writer.ColumnWriter.Statistics) { stats.FillColumnStatistics(columnStats); stats.FillColumnStatistics(writer.FileStatistics); } stripeStats.ColStats.Add(columnStats); } _stripeStats.Add(stripeStats); stripeInformation.IndexLength = (ulong)_outputStream.Position - stripeInformation.Offset; //Data streams foreach (var writer in _columnWriters) { foreach (var buffer in writer.ColumnWriter.Buffers) { if (!buffer.MustBeIncluded) { continue; } buffer.CopyTo(_outputStream); stripeFooter.AddDataStream(writer.ColumnWriter.ColumnId, buffer); } } stripeInformation.DataLength = (ulong)_outputStream.Position - stripeInformation.IndexLength - stripeInformation.Offset; //Footer long footerLength; _bufferFactory.SerializeAndCompressTo(_outputStream, stripeFooter, out footerLength); stripeInformation.FooterLength = (ulong)footerLength; _stripeInformations.Add(stripeInformation); _rowsInFile += _rowsInStripe; _rowsInStripe = 0; foreach (var writer in _columnWriters) { writer.ColumnWriter.Reset(); } }