private List <PageTag> WriteColumn(DataColumn column, Thrift.SchemaElement tse, IDataTypeHandler dataTypeHandler, int maxRepetitionLevel, int maxDefinitionLevel) { var pages = new List <PageTag>(); /* * Page header must preceeed actual data (compressed or not) however it contains both * the uncompressed and compressed data size which we don't know! This somehow limits * the write efficiency. */ using (var ms = new MemoryStream()) { Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.TotalCount); //chain streams together so we have real streaming instead of wasting undefraggable LOH memory using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, true)) { using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true)) { if (maxRepetitionLevel > 0) { WriteLevels(writer, column.RepetitionLevels, maxRepetitionLevel); } if (maxDefinitionLevel > 0) { WriteLevels(writer, column.DefinitionLevels, maxDefinitionLevel); } dataTypeHandler.Write(tse, writer, column.DefinedData); writer.Flush(); } pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing dataPageHeader.Uncompressed_page_size = (int)pageStream.Position; } dataPageHeader.Compressed_page_size = (int)ms.Position; //write the header in int headerSize = _thriftStream.Write(dataPageHeader); ms.Position = 0; ms.CopyTo(_stream); var dataTag = new PageTag { HeaderMeta = dataPageHeader, HeaderSize = headerSize }; pages.Add(dataTag); } return(pages); }
private List <PageTag> WriteColumn(DataColumn column, Thrift.SchemaElement tse, IDataTypeHandler dataTypeHandler, int maxRepetitionLevel, int maxDefinitionLevel) { var pages = new List <PageTag>(); /* * Page header must preceeed actual data (compressed or not) however it contains both * the uncompressed and compressed data size which we don't know! This somehow limits * the write efficiency. */ using (var ms = new MemoryStream()) { Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.Data.Length); //chain streams together so we have real streaming instead of wasting undefraggable LOH memory using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, _compressionLevel, true)) { using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true)) { if (column.RepetitionLevels != null) { WriteLevels(writer, column.RepetitionLevels, column.RepetitionLevels.Length, maxRepetitionLevel); } Array data = column.Data; if (maxDefinitionLevel > 0) { data = column.PackDefinitions(maxDefinitionLevel, out int[] definitionLevels, out int definitionLevelsLength, out int nullCount); //last chance to capture null count as null data is compressed now column.Statistics.NullCount = nullCount; try { WriteLevels(writer, definitionLevels, definitionLevelsLength, maxDefinitionLevel); } finally { if (definitionLevels != null) { ArrayPool <int> .Shared.Return(definitionLevels); } } } else { //no defitions means no nulls column.Statistics.NullCount = 0; } dataTypeHandler.Write(tse, writer, data, column.Statistics); writer.Flush(); } pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing pageStream.MarkWriteFinished(); dataPageHeader.Uncompressed_page_size = (int)pageStream.Position; } dataPageHeader.Compressed_page_size = (int)ms.Position; //write the header in dataPageHeader.Data_page_header.Statistics = column.Statistics.ToThriftStatistics(dataTypeHandler, _schemaElement); int headerSize = _thriftStream.Write(dataPageHeader); ms.Position = 0; ms.CopyTo(_stream); var dataTag = new PageTag { HeaderMeta = dataPageHeader, HeaderSize = headerSize }; pages.Add(dataTag); } return(pages); }