Beispiel #1
0
        /// <summary>
        /// Applies dictionary with indexes and definition levels directly over the column
        /// </summary>
        public IList Apply(IList dictionary, List <int> definitions, List <int> repetitions, List <int> indexes, int maxValues)
        {
            if (dictionary == null && definitions == null && indexes == null && repetitions == null)
            {
                return(_values);                                                                                   //values are just values
            }
            ApplyDictionary(dictionary, indexes, maxValues);

            List <bool> hasValueFlags = DefinitionPack.InsertDefinitions(_values, _maxDefinitionLevel, definitions);

            _values = RepetitionPack.FlatToHierarchy(_maxRepetitionLevel, _createEmptyListFunc, _values, repetitions, hasValueFlags);

            return(_values);
        }
        private List <PageTag> WriteValues(IList values)
        {
            var result = new List <PageTag>();

            byte[]      dataPageBytes;
            List <int>  repetitions   = null;
            List <int>  definitions   = null;
            List <bool> hasValueFlags = null;

            if (values != null)
            {
                //flatten values and create repetitions list if the field is repeatable
                if (_maxRepetitionLevel > 0)
                {
                    repetitions = new List <int>();
                    IList flatValues = _dataTypeHandler.CreateEmptyList(_tse.IsNullable(), false, 0);
                    hasValueFlags = new List <bool>();
                    RepetitionPack.HierarchyToFlat(_maxRepetitionLevel, values, flatValues, repetitions, hasValueFlags);
                    values = flatValues;
                    _ph.Data_page_header.Num_values = values.Count; //update with new count
                }

                if (_maxDefinitionLevel > 0)
                {
                    definitions = DefinitionPack.RemoveNulls(values, _maxDefinitionLevel, hasValueFlags);
                }
            }

            using (var ms = new MemoryStream())
            {
                using (var writer = new BinaryWriter(ms))
                {
                    if (values != null)
                    {
                        //write repetitions
                        if (repetitions != null)
                        {
                            WriteLevels(writer, repetitions, _maxRepetitionLevel);
                        }

                        //write definitions
                        if (definitions != null)
                        {
                            WriteLevels(writer, definitions, _maxDefinitionLevel);
                        }

                        //write data
                        _dataTypeHandler.Write(_tse, writer, values);
                    }

                    dataPageBytes = ms.ToArray();
                }
            }

            dataPageBytes = Compress(dataPageBytes);
            int dataHeaderSize = Write(dataPageBytes);

            result.Add(new PageTag {
                HeaderSize = dataHeaderSize, HeaderMeta = _ph
            });

            return(result);
        }
Beispiel #3
0
        private List <PageTag> WriteValues(SchemaElement schema, IList values, Thrift.PageHeader ph, CompressionMethod compression)
        {
            var result = new List <PageTag>();

            byte[] dictionaryPageBytes = null;
            int    dictionaryPageCount = 0;

            byte[]     dataPageBytes;
            List <int> repetitions = null;
            List <int> definitions = null;

            //flatten values and create repetitions list if the field is repeatable
            if (schema.MaxRepetitionLevel > 0)
            {
                var rpack = new RepetitionPack();
                values = rpack.Unpack(_schema, values, out repetitions);
                ph.Data_page_header.Num_values = values.Count;
            }

            if (schema.IsNullable || schema.MaxDefinitionLevel > 0)
            {
                var dpack = new DefinitionPack();
                values = dpack.Unpack(values, _schema, out definitions);
            }

            using (var ms = new MemoryStream())
            {
                using (var writer = new BinaryWriter(ms))
                {
                    //write repetitions
                    if (repetitions != null)
                    {
                        int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxRepetitionLevel);
                        RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, repetitions);
                    }

                    //write definitions
                    if (definitions != null)
                    {
                        int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxDefinitionLevel);
                        RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, definitions);
                    }

                    //write data
                    if (!_writerOptions.UseDictionaryEncoding || !_dicWriter.Write(writer, schema, values, out IList dicValues))
                    {
                        _plainWriter.Write(writer, schema, values, out IList plainExtra);
                    }
                    else
                    {
                        dictionaryPageCount          = dicValues.Count;
                        ph.Data_page_header.Encoding = Thrift.Encoding.PLAIN_DICTIONARY;
                        using (var dms = new MemoryStream())
                            using (var dwriter = new BinaryWriter(dms))
                            {
                                _plainWriter.Write(dwriter, schema, dicValues, out IList t0);
                                dictionaryPageBytes = dms.ToArray();
                            }
                    }

                    dataPageBytes = ms.ToArray();
                }
            }

            if (dictionaryPageBytes != null)
            {
                Thrift.PageHeader dph = _meta.CreateDictionaryPage(dictionaryPageCount);
                dictionaryPageBytes = Compress(dph, dictionaryPageBytes, compression);
                int dictionaryHeaderSize = Write(dph, dictionaryPageBytes);
                result.Add(new PageTag {
                    HeaderSize = dictionaryHeaderSize, HeaderMeta = dph
                });
            }

            dataPageBytes = Compress(ph, dataPageBytes, compression);
            int dataHeaderSize = Write(ph, dataPageBytes);

            result.Add(new PageTag {
                HeaderSize = dataHeaderSize, HeaderMeta = ph
            });

            return(result);
        }
Beispiel #4
0
        private void ApplyRepetitions(List <int> repetitions)
        {
            var packer = new RepetitionPack();

            _values = packer.Pack(_schema, _values, repetitions);
        }