/// <summary> /// Reads levels, suitable for both repetition levels and definition levels /// </summary> /// <param name="reader"></param> /// <param name="maxLevel">Maximum level value, depends on level type</param> /// <returns></returns> private List <int> ReadLevels(BinaryReader reader, int maxLevel) { int bitWidth = PEncoding.GetWidthFromMaxInt(maxLevel); var result = new List <int>(); //todo: there might be more data on larger files, therefore line below need to be called in a loop until valueCount is satisfied RunLengthBitPackingHybridValuesReader.ReadRleBitpackedHybrid(reader, bitWidth, 0, result); return(result); }
private List <int> ReadDefinitionLevels(BinaryReader reader, int valueCount) { const int maxDefinitionLevel = 1; //todo: for nested columns this needs to be calculated properly int bitWidth = PEncoding.GetWidthFromMaxInt(maxDefinitionLevel); var result = new List <int>(); //todo: there might be more data on larger files, therefore line below need to be called in a loop until valueCount is satisfied RunLengthBitPackingHybridValuesReader.ReadRleBitpackedHybrid(reader, bitWidth, 0, result, valueCount); int maxLevel = _schema.GetMaxDefinitionLevel(_thriftChunk); int nullCount = valueCount - result.Count(r => r == maxLevel); if (nullCount == 0) { return(null); } return(result); }
private List <PageTag> WriteValues(SchemaElement schema, IList values, Thrift.PageHeader ph, CompressionMethod compression) { var result = new List <PageTag>(); byte[] dictionaryPageBytes = null; int dictionaryPageCount = 0; byte[] dataPageBytes; List <int> repetitions = null; List <int> definitions = null; //flatten values and create repetitions list if the field is repeatable if (schema.MaxRepetitionLevel > 0) { var rpack = new RepetitionPack(); values = rpack.Unpack(_schema, values, out repetitions); ph.Data_page_header.Num_values = values.Count; } if (schema.IsNullable || schema.MaxDefinitionLevel > 0) { var dpack = new DefinitionPack(); values = dpack.Unpack(values, _schema, out definitions); } using (var ms = new MemoryStream()) { using (var writer = new BinaryWriter(ms)) { //write repetitions if (repetitions != null) { int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxRepetitionLevel); RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, repetitions); } //write definitions if (definitions != null) { int bitWidth = PEncoding.GetWidthFromMaxInt(_schema.MaxDefinitionLevel); RunLengthBitPackingHybridValuesWriter.Write(writer, bitWidth, definitions); } //write data if (!_writerOptions.UseDictionaryEncoding || !_dicWriter.Write(writer, schema, values, out IList dicValues)) { _plainWriter.Write(writer, schema, values, out IList plainExtra); } else { dictionaryPageCount = dicValues.Count; ph.Data_page_header.Encoding = Thrift.Encoding.PLAIN_DICTIONARY; using (var dms = new MemoryStream()) using (var dwriter = new BinaryWriter(dms)) { _plainWriter.Write(dwriter, schema, dicValues, out IList t0); dictionaryPageBytes = dms.ToArray(); } } dataPageBytes = ms.ToArray(); } } if (dictionaryPageBytes != null) { Thrift.PageHeader dph = _meta.CreateDictionaryPage(dictionaryPageCount); dictionaryPageBytes = Compress(dph, dictionaryPageBytes, compression); int dictionaryHeaderSize = Write(dph, dictionaryPageBytes); result.Add(new PageTag { HeaderSize = dictionaryHeaderSize, HeaderMeta = dph }); } dataPageBytes = Compress(ph, dataPageBytes, compression); int dataHeaderSize = Write(ph, dataPageBytes); result.Add(new PageTag { HeaderSize = dataHeaderSize, HeaderMeta = ph }); return(result); }