private byte[] Compress(Thrift.PageHeader ph, byte[] data, CompressionMethod compression)
        {
            //note that page size numbers do not include header size by spec

            ph.Uncompressed_page_size = data.Length;
            byte[] result;

            if (compression != CompressionMethod.None)
            {
                IDataWriter writer = DataFactory.GetWriter(compression);
                using (var ms = new MemoryStream())
                {
                    writer.Write(data, ms);
                    result = ms.ToArray();
                }
                ph.Compressed_page_size = result.Length;
            }
            else
            {
                ph.Compressed_page_size = ph.Uncompressed_page_size;
                result = data;
            }

            return(result);
        }
        private int Write(Thrift.PageHeader ph, byte[] data)
        {
            int headerSize = ThriftStream.Write(ph);

            _output.Write(data, 0, data.Length);
            return(headerSize);
        }
        private Thrift.ColumnChunk Write(SchemaElement schema, IList values,
                                         CompressionMethod compression,
                                         ColumnStats stats)
        {
            Thrift.ColumnChunk chunk = _meta.AddColumnChunk(compression, _output, schema, values.Count);
            Thrift.PageHeader  ph    = _meta.CreateDataPage(values.Count);

            List <PageTag> pages = WriteValues(schema, values, ph, compression, stats);

            //the following counters must include both data size and header size
            chunk.Meta_data.Total_compressed_size   = pages.Sum(p => p.HeaderMeta.Compressed_page_size + p.HeaderSize);
            chunk.Meta_data.Total_uncompressed_size = pages.Sum(p => p.HeaderMeta.Uncompressed_page_size + p.HeaderSize);

            return(chunk);
        }
        private void WriteValues(SchemaElement schema, IList values, Thrift.PageHeader ph, CompressionMethod compression, ColumnStats stats)
        {
            byte[] data;

            using (var ms = new MemoryStream())
            {
                using (var writer = new BinaryWriter(ms))
                {
                    if (stats.NullCount > 0)
                    {
                        CreateDefinitions(values, schema, out IList newValues, out List <int> definitions);
                        values = newValues;

                        _rleWriter.Write(writer, schema, definitions);
                    }

                    _plainWriter.Write(writer, schema, values);

                    data = ms.ToArray();
                }
            }

            ph.Uncompressed_page_size = data.Length;

            if (compression != CompressionMethod.None)
            {
                IDataWriter writer = DataFactory.GetWriter(compression);
                using (var ms = new MemoryStream())
                {
                    writer.Write(data, ms);
                    data = ms.ToArray();
                }
                ph.Compressed_page_size = data.Length;
            }
            else
            {
                ph.Compressed_page_size = ph.Uncompressed_page_size;
            }

            _thrift.Write(ph);
            _output.Write(data, 0, data.Length);
        }
        private Thrift.ColumnChunk Write(SchemaElement schema, IList values,
                                         CompressionMethod compression,
                                         ColumnStats stats)
        {
            Thrift.ColumnChunk chunk = _meta.AddColumnChunk(compression, _output, schema, values.Count);

            var ph = new Thrift.PageHeader(Thrift.PageType.DATA_PAGE, 0, 0);

            ph.Data_page_header = new Thrift.DataPageHeader
            {
                Encoding = Thrift.Encoding.PLAIN,
                Definition_level_encoding = Thrift.Encoding.RLE,
                Repetition_level_encoding = Thrift.Encoding.BIT_PACKED,
                Num_values = values.Count
            };

            WriteValues(schema, values, ph, compression, stats);

            return(chunk);
        }
        private List <PageTag> WriteValues(SchemaElement schema, IList values, Thrift.PageHeader ph, CompressionMethod compression, ColumnStats stats)
        {
            var result = new List <PageTag>();

            byte[] dictionaryPageBytes = null;
            int    dictionaryPageCount = 0;

            byte[] dataPageBytes;

            //flatten values if the field is repeatable
            if (schema.IsRepeated)
            {
                values = FlattenRepeatables(values, schema);
            }

            using (var ms = new MemoryStream())
            {
                using (var writer = new BinaryWriter(ms))
                {
                    //write repetitions
                    if (schema.IsRepeated)
                    {
                        List <int> repetitions = CreateRepetitions(values, schema);
                        _rleWriter.Write(writer, _definitionsSchema, repetitions, out IList nullExtra);
                    }

                    //write definitions
                    if (schema.HasNulls || schema.IsRepeated)
                    {
                        CreateDefinitions(values, schema, out IList newValues, out List <int> definitions);
                        values = newValues;

                        _rleWriter.Write(writer, _definitionsSchema, definitions, out IList nullExtra);
                    }

                    //write data
                    if (!_writerOptions.UseDictionaryEncoding || !_dicWriter.Write(writer, schema, values, out IList dicValues))
                    {
                        _plainWriter.Write(writer, schema, values, out IList plainExtra);
                    }
                    else
                    {
                        dictionaryPageCount          = dicValues.Count;
                        ph.Data_page_header.Encoding = Thrift.Encoding.PLAIN_DICTIONARY;
                        using (var dms = new MemoryStream())
                            using (var dwriter = new BinaryWriter(dms))
                            {
                                _plainWriter.Write(dwriter, schema, dicValues, out IList t0);
                                dictionaryPageBytes = dms.ToArray();
                            }
                    }

                    dataPageBytes = ms.ToArray();
                }
            }

            if (dictionaryPageBytes != null)
            {
                Thrift.PageHeader dph = _meta.CreateDictionaryPage(dictionaryPageCount);
                dictionaryPageBytes = Compress(dph, dictionaryPageBytes, compression);
                int dictionaryHeaderSize = Write(dph, dictionaryPageBytes);
                result.Add(new PageTag {
                    HeaderSize = dictionaryHeaderSize, HeaderMeta = dph
                });
            }

            dataPageBytes = Compress(ph, dataPageBytes, compression);
            int dataHeaderSize = Write(ph, dataPageBytes);

            result.Add(new PageTag {
                HeaderSize = dataHeaderSize, HeaderMeta = ph
            });

            return(result);
        }