private Thrift.ColumnChunk WriteColumnChunk(Thrift.SchemaElement tse, List <string> path, DataColumn column, IDataTypeHandler dataTypeHandler) { Thrift.ColumnChunk chunk = _footer.CreateColumnChunk(_compressionMethod, _stream, tse.Type, path, 0); Thrift.PageHeader ph = _footer.CreateDataPage(_rowCount); _footer.GetLevels(chunk, out int maxRepetitionLevel, out int maxDefinitionLevel); List <PageTag> pages = WriteColumn(column, tse, dataTypeHandler, maxRepetitionLevel, maxDefinitionLevel); chunk.Meta_data.Num_values = ph.Data_page_header.Num_values; //the following counters must include both data size and header size chunk.Meta_data.Total_compressed_size = pages.Sum(p => p.HeaderMeta.Compressed_page_size + p.HeaderSize); chunk.Meta_data.Total_uncompressed_size = pages.Sum(p => p.HeaderMeta.Uncompressed_page_size + p.HeaderSize); return(chunk); }
public Thrift.ColumnChunk Write(List <string> path, DataColumn column, IDataTypeHandler dataTypeHandler) { Thrift.ColumnChunk chunk = _footer.CreateColumnChunk(_compressionMethod, _stream, _schemaElement.Type, path, 0); Thrift.PageHeader ph = _footer.CreateDataPage(column.TotalCount); _footer.GetLevels(chunk, out int maxRepetitionLevel, out int maxDefinitionLevel); List <PageTag> pages = WriteColumn(column, _schemaElement, dataTypeHandler, maxRepetitionLevel, maxDefinitionLevel); //this count must be set to number of all values in the column, including nulls. //for hierarchy/repeated columns this is a count of flattened list, including nulls. chunk.Meta_data.Num_values = ph.Data_page_header.Num_values; //the following counters must include both data size and header size chunk.Meta_data.Total_compressed_size = pages.Sum(p => p.HeaderMeta.Compressed_page_size + p.HeaderSize); chunk.Meta_data.Total_uncompressed_size = pages.Sum(p => p.HeaderMeta.Uncompressed_page_size + p.HeaderSize); return(chunk); }
public ColumnarWriter(Stream output, ThriftStream thriftStream, ThriftFooter footer, Thrift.SchemaElement tse, List <string> path, CompressionMethod compressionMethod, ParquetOptions formatOptions, WriterOptions writerOptions) { _output = output; _thriftStream = thriftStream; _footer = footer; _tse = tse; _compressionMethod = compressionMethod; _formatOptions = formatOptions; _writerOptions = writerOptions; _dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions); _chunk = _footer.CreateColumnChunk(_compressionMethod, _output, _tse.Type, path, 0); _ph = _footer.CreateDataPage(0); _footer.GetLevels(_chunk, out int maxRepetitionLevel, out int maxDefinitionLevel); _maxRepetitionLevel = maxRepetitionLevel; _maxDefinitionLevel = maxDefinitionLevel; }