Exemplo n.º 1
0
        /// <summary>
        /// Reads levels, suitable for both repetition levels and definition levels
        /// </summary>
        /// <param name="reader"></param>
        /// <param name="maxLevel">Maximum level value, depends on level type</param>
        /// <returns></returns>
        private List <int> ReadLevels(BinaryReader reader, int maxLevel)
        {
            int bitWidth = PEncoding.GetWidthFromMaxInt(maxLevel);
            var result   = new List <int>();

            //todo: there might be more data on larger files, therefore line below need to be called in a loop until valueCount is satisfied
            RunLengthBitPackingHybridValuesReader.ReadRleBitpackedHybrid(reader, bitWidth, 0, result);

            return(result);
        }
Exemplo n.º 2
0
        private List <int> ReadDefinitionLevels(BinaryReader reader, int valueCount)
        {
            const int maxDefinitionLevel = 1; //todo: for nested columns this needs to be calculated properly
            int       bitWidth           = PEncoding.GetWidthFromMaxInt(maxDefinitionLevel);
            var       result             = new List <int>();

            //todo: there might be more data on larger files, therefore line below need to be called in a loop until valueCount is satisfied
            RunLengthBitPackingHybridValuesReader.ReadRleBitpackedHybrid(reader, bitWidth, 0, result, valueCount);

            int maxLevel  = _schema.GetMaxDefinitionLevel(_thriftChunk);
            int nullCount = valueCount - result.Count(r => r == maxLevel);

            if (nullCount == 0)
            {
                return(null);
            }

            return(result);
        }
Exemplo n.º 3
0
        private List <PageTag> WriteValues(SchemaElement schema, IList values, Thrift.PageHeader ph, CompressionMethod compression)
        {
            var result = new List <PageTag>();

            byte[] dictionaryPageBytes = null;
            int    dictionaryPageCount = 0;

            byte[]     dataPageBytes;
            List <int> repetitions = null;
            List <int> definitions = null;

            //flatten values and create repetitions list if the field is repeatable
            if (schema.MaxRepetitionLevel > 0)
            {
                var rpack = new RepetitionPack();
                values = rpack.Unpack(_schema, values, out repetitions);
                ph.Data_page_header.Num_values = values.Count;
            }

            if (schema.IsNullable || schema.MaxDefinitionLevel > 0)
            {
                var dpack = new DefinitionPack();
                values = dpack.Unpack(values, _schema, out definitions);
            }

            using (var ms = new MemoryStream())
            {
                using (var writer = new BinaryWriter(ms))
                {
                    //write repetitions
                    if (repetitions != null)
                    {
                        int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxRepetitionLevel);
                        RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, repetitions);
                    }

                    //write definitions
                    if (definitions != null)
                    {
                        int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxDefinitionLevel);
                        RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, definitions);
                    }

                    //write data
                    if (!_writerOptions.UseDictionaryEncoding || !_dicWriter.Write(writer, schema, values, out IList dicValues))
                    {
                        _plainWriter.Write(writer, schema, values, out IList plainExtra);
                    }
                    else
                    {
                        dictionaryPageCount          = dicValues.Count;
                        ph.Data_page_header.Encoding = Thrift.Encoding.PLAIN_DICTIONARY;
                        using (var dms = new MemoryStream())
                            using (var dwriter = new BinaryWriter(dms))
                            {
                                _plainWriter.Write(dwriter, schema, dicValues, out IList t0);
                                dictionaryPageBytes = dms.ToArray();
                            }
                    }

                    dataPageBytes = ms.ToArray();
                }
            }

            if (dictionaryPageBytes != null)
            {
                Thrift.PageHeader dph = _meta.CreateDictionaryPage(dictionaryPageCount);
                dictionaryPageBytes = Compress(dph, dictionaryPageBytes, compression);
                int dictionaryHeaderSize = Write(dph, dictionaryPageBytes);
                result.Add(new PageTag {
                    HeaderSize = dictionaryHeaderSize, HeaderMeta = dph
                });
            }

            dataPageBytes = Compress(ph, dataPageBytes, compression);
            int dataHeaderSize = Write(ph, dataPageBytes);

            result.Add(new PageTag {
                HeaderSize = dataHeaderSize, HeaderMeta = ph
            });

            return(result);
        }