/// <summary> /// Applies dictionary with indexes and definition levels directly over the column /// </summary> public IList Apply(IList dictionary, List <int> definitions, List <int> repetitions, List <int> indexes, int maxValues) { if (dictionary == null && definitions == null && indexes == null && repetitions == null) { return(_values); //values are just values } ApplyDictionary(dictionary, indexes, maxValues); List <bool> hasValueFlags = DefinitionPack.InsertDefinitions(_values, _maxDefinitionLevel, definitions); _values = RepetitionPack.FlatToHierarchy(_maxRepetitionLevel, _createEmptyListFunc, _values, repetitions, hasValueFlags); return(_values); }
private List <PageTag> WriteValues(IList values) { var result = new List <PageTag>(); byte[] dataPageBytes; List <int> repetitions = null; List <int> definitions = null; List <bool> hasValueFlags = null; if (values != null) { //flatten values and create repetitions list if the field is repeatable if (_maxRepetitionLevel > 0) { repetitions = new List <int>(); IList flatValues = _dataTypeHandler.CreateEmptyList(_tse.IsNullable(), false, 0); hasValueFlags = new List <bool>(); RepetitionPack.HierarchyToFlat(_maxRepetitionLevel, values, flatValues, repetitions, hasValueFlags); values = flatValues; _ph.Data_page_header.Num_values = values.Count; //update with new count } if (_maxDefinitionLevel > 0) { definitions = DefinitionPack.RemoveNulls(values, _maxDefinitionLevel, hasValueFlags); } } using (var ms = new MemoryStream()) { using (var writer = new BinaryWriter(ms)) { if (values != null) { //write repetitions if (repetitions != null) { WriteLevels(writer, repetitions, _maxRepetitionLevel); } //write definitions if (definitions != null) { WriteLevels(writer, definitions, _maxDefinitionLevel); } //write data _dataTypeHandler.Write(_tse, writer, values); } dataPageBytes = ms.ToArray(); } } dataPageBytes = Compress(dataPageBytes); int dataHeaderSize = Write(dataPageBytes); result.Add(new PageTag { HeaderSize = dataHeaderSize, HeaderMeta = _ph }); return(result); }
private List <PageTag> WriteValues(SchemaElement schema, IList values, Thrift.PageHeader ph, CompressionMethod compression) { var result = new List <PageTag>(); byte[] dictionaryPageBytes = null; int dictionaryPageCount = 0; byte[] dataPageBytes; List <int> repetitions = null; List <int> definitions = null; //flatten values and create repetitions list if the field is repeatable if (schema.MaxRepetitionLevel > 0) { var rpack = new RepetitionPack(); values = rpack.Unpack(_schema, values, out repetitions); ph.Data_page_header.Num_values = values.Count; } if (schema.IsNullable || schema.MaxDefinitionLevel > 0) { var dpack = new DefinitionPack(); values = dpack.Unpack(values, _schema, out definitions); } using (var ms = new MemoryStream()) { using (var writer = new BinaryWriter(ms)) { //write repetitions if (repetitions != null) { int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxRepetitionLevel); RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, repetitions); } //write definitions if (definitions != null) { int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxDefinitionLevel); RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, definitions); } //write data if (!_writerOptions.UseDictionaryEncoding || !_dicWriter.Write(writer, schema, values, out IList dicValues)) { _plainWriter.Write(writer, schema, values, out IList plainExtra); } else { dictionaryPageCount = dicValues.Count; ph.Data_page_header.Encoding = Thrift.Encoding.PLAIN_DICTIONARY; using (var dms = new MemoryStream()) using (var dwriter = new BinaryWriter(dms)) { _plainWriter.Write(dwriter, schema, dicValues, out IList t0); dictionaryPageBytes = dms.ToArray(); } } dataPageBytes = ms.ToArray(); } } if (dictionaryPageBytes != null) { Thrift.PageHeader dph = _meta.CreateDictionaryPage(dictionaryPageCount); dictionaryPageBytes = Compress(dph, dictionaryPageBytes, compression); int dictionaryHeaderSize = Write(dph, dictionaryPageBytes); result.Add(new PageTag { HeaderSize = dictionaryHeaderSize, HeaderMeta = dph }); } dataPageBytes = Compress(ph, dataPageBytes, compression); int dataHeaderSize = Write(ph, dataPageBytes); result.Add(new PageTag { HeaderSize = dataHeaderSize, HeaderMeta = ph }); return(result); }
private void ApplyRepetitions(List <int> repetitions) { var packer = new RepetitionPack(); _values = packer.Pack(_schema, _values, repetitions); }