private Thrift.ColumnChunk WriteColumnChunk(Thrift.SchemaElement tse, List <string> path, DataColumn column, IDataTypeHandler dataTypeHandler)
        {
            Thrift.ColumnChunk chunk = _footer.CreateColumnChunk(_compressionMethod, _stream, tse.Type, path, 0);
            Thrift.PageHeader  ph    = _footer.CreateDataPage(_rowCount);
            _footer.GetLevels(chunk, out int maxRepetitionLevel, out int maxDefinitionLevel);

            List <PageTag> pages = WriteColumn(column, tse, dataTypeHandler, maxRepetitionLevel, maxDefinitionLevel);

            chunk.Meta_data.Num_values = ph.Data_page_header.Num_values;

            //the following counters must include both data size and header size
            chunk.Meta_data.Total_compressed_size   = pages.Sum(p => p.HeaderMeta.Compressed_page_size + p.HeaderSize);
            chunk.Meta_data.Total_uncompressed_size = pages.Sum(p => p.HeaderMeta.Uncompressed_page_size + p.HeaderSize);

            return(chunk);
        }
Esempio n. 2
0
        public Thrift.ColumnChunk Write(List <string> path, DataColumn column, IDataTypeHandler dataTypeHandler)
        {
            Thrift.ColumnChunk chunk = _footer.CreateColumnChunk(_compressionMethod, _stream, _schemaElement.Type, path, 0);
            Thrift.PageHeader  ph    = _footer.CreateDataPage(column.TotalCount);
            _footer.GetLevels(chunk, out int maxRepetitionLevel, out int maxDefinitionLevel);

            List <PageTag> pages = WriteColumn(column, _schemaElement, dataTypeHandler, maxRepetitionLevel, maxDefinitionLevel);

            //this count must be set to number of all values in the column, including nulls.
            //for hierarchy/repeated columns this is a count of flattened list, including nulls.
            chunk.Meta_data.Num_values = ph.Data_page_header.Num_values;

            //the following counters must include both data size and header size
            chunk.Meta_data.Total_compressed_size   = pages.Sum(p => p.HeaderMeta.Compressed_page_size + p.HeaderSize);
            chunk.Meta_data.Total_uncompressed_size = pages.Sum(p => p.HeaderMeta.Uncompressed_page_size + p.HeaderSize);

            return(chunk);
        }
        public ColumnarWriter(Stream output, ThriftStream thriftStream,
                              ThriftFooter footer,
                              Thrift.SchemaElement tse, List <string> path,
                              CompressionMethod compressionMethod,
                              ParquetOptions formatOptions,
                              WriterOptions writerOptions)
        {
            _output            = output;
            _thriftStream      = thriftStream;
            _footer            = footer;
            _tse               = tse;
            _compressionMethod = compressionMethod;
            _formatOptions     = formatOptions;
            _writerOptions     = writerOptions;
            _dataTypeHandler   = DataTypeFactory.Match(tse, _formatOptions);

            _chunk = _footer.CreateColumnChunk(_compressionMethod, _output, _tse.Type, path, 0);
            _ph    = _footer.CreateDataPage(0);
            _footer.GetLevels(_chunk, out int maxRepetitionLevel, out int maxDefinitionLevel);
            _maxRepetitionLevel = maxRepetitionLevel;
            _maxDefinitionLevel = maxDefinitionLevel;
        }