コード例 #1
0
        void TestRoundTrip(long[] test, bool isSigned, bool aligned, int?expectedEncodeLength = null)
        {
            var stream = new MemoryStream();
            var writer = new IntegerRunLengthEncodingV2Writer(stream);

            writer.Write(test, isSigned, aligned);

            //If we know the encode length, make sure it's correct
            if (expectedEncodeLength.HasValue)
            {
                Assert.Equal(expectedEncodeLength.Value, stream.Length);
            }

            stream.Seek(0, SeekOrigin.Begin);

            var reader = new IntegerRunLengthEncodingV2Reader(stream, isSigned);
            var result = reader.Read().ToArray();

            //Make sure all bytes in the written stream were consumed
            Assert.Equal(stream.Length, stream.Position);

            //Check the actual values
            Assert.Equal(test.Length, result.Length);
            for (int i = 0; i < test.Length; i++)
            {
                Assert.Equal(test[i], result[i]);
            }
        }
コード例 #2
0
        public void AddBlock(IList <decimal?> values)
        {
            var stats = new DecimalWriterStatistics();

            Statistics.Add(stats);
            foreach (var buffer in Buffers)
            {
                buffer.AnnotatePosition(stats, 0);
            }

            var wholePartsList = new List <long>(values.Count);
            var scaleList      = new List <long>(values.Count);

            if (_isNullable)
            {
                var presentList = new List <bool>(values.Count);

                foreach (var value in values)
                {
                    stats.AddValue(value);
                    if (value.HasValue)
                    {
                        var longAndScale = value.Value.ToLongAndScale();
                        var rescaled     = longAndScale.Rescale(_scale, truncateIfNecessary: false);
                        wholePartsList.Add(rescaled.Item1);
                        scaleList.Add(rescaled.Item2);
                    }
                    presentList.Add(value.HasValue);
                }

                var presentEncoder = new BitWriter(_presentBuffer);
                presentEncoder.Write(presentList);
                if (stats.HasNull)
                {
                    _presentBuffer.MustBeIncluded = true;
                }
            }
            else
            {
                foreach (var value in values)
                {
                    stats.AddValue(value);
                    var longAndScale = value.Value.ToLongAndScale();
                    var rescaled     = longAndScale.Rescale(_scale, truncateIfNecessary: false);
                    wholePartsList.Add(rescaled.Item1);
                    scaleList.Add(rescaled.Item2);
                }
            }

            var varIntEncoder = new VarIntWriter(_dataBuffer);

            varIntEncoder.Write(wholePartsList);

            var scaleEncoder = new IntegerRunLengthEncodingV2Writer(_secondaryBuffer);

            scaleEncoder.Write(scaleList, true, _shouldAlignEncodedValues);
        }
コード例 #3
0
ファイル: DateWriter.cs プロジェクト: valxv/ApacheOrcDotNet
        public void AddBlock(IList <DateTime?> values)
        {
            var stats = new DateWriterStatistics();

            Statistics.Add(stats);
            if (_isNullable)
            {
                _presentBuffer.AnnotatePosition(stats, rleValuesToConsume: 0, bitsToConsume: 0);
            }
            _dataBuffer.AnnotatePosition(stats, rleValuesToConsume: 0);

            var datesList = new List <long>(values.Count);

            if (_isNullable)
            {
                var presentList = new List <bool>(values.Count);

                foreach (var value in values)
                {
                    if (!value.HasValue)
                    {
                        stats.AddValue(null);
                        presentList.Add(false);
                    }
                    else
                    {
                        var daysSinceEpoch = (int)(value.Value - _unixEpoch).TotalDays;
                        stats.AddValue(daysSinceEpoch);
                        presentList.Add(true);
                        datesList.Add(daysSinceEpoch);
                    }
                }

                var presentEncoder = new BitWriter(_presentBuffer);
                presentEncoder.Write(presentList);
                if (stats.HasNull)
                {
                    _presentBuffer.MustBeIncluded = true;
                }
            }
            else
            {
                foreach (var value in values)
                {
                    var daysSinceEpoch = (int)(value.Value - _unixEpoch).TotalDays;
                    stats.AddValue(daysSinceEpoch);
                    datesList.Add(daysSinceEpoch);
                }
            }

            var datesEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer);

            datesEncoder.Write(datesList, true, _shouldAlignEncodedValues);
        }
コード例 #4
0
        void TestWrite(byte[] expected, long[] input, bool isSigned, bool aligned)
        {
            var stream = new MemoryStream();
            var writer = new IntegerRunLengthEncodingV2Writer(stream);

            writer.Write(input, isSigned, aligned);
            var actual = stream.ToArray();

            Assert.Equal(expected.Length, actual.Length);
            for (int i = 0; i < expected.Length; i++)
            {
                Assert.Equal(expected[i], actual[i]);
            }
        }
コード例 #5
0
        public void AddBlock(IList <long?> values)
        {
            var stats = new LongWriterStatistics();

            Statistics.Add(stats);
            if (_isNullable)
            {
                _presentBuffer.AnnotatePosition(stats, rleValuesToConsume: 0, bitsToConsume: 0);
            }
            _dataBuffer.AnnotatePosition(stats, rleValuesToConsume: 0);

            var valList = new List <long>(values.Count);

            if (_isNullable)
            {
                var presentList = new List <bool>(values.Count);

                foreach (var value in values)
                {
                    stats.AddValue(value);
                    if (value.HasValue)
                    {
                        valList.Add(value.Value);
                    }
                    presentList.Add(value.HasValue);
                }

                var presentEncoder = new BitWriter(_presentBuffer);
                presentEncoder.Write(presentList);
                if (stats.HasNull)
                {
                    _presentBuffer.MustBeIncluded = true;                         //A null occurred.  Make sure to write this stream
                }
            }
            else
            {
                foreach (var value in values)
                {
                    stats.AddValue(value);
                    valList.Add(value.Value);
                }
            }

            var valEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer);

            valEncoder.Write(valList, true, _shouldAlignEncodedValues);
        }
コード例 #6
0
        public void AddBlock(IList <byte[]> values)
        {
            var stats = new BinaryWriterStatistics();

            Statistics.Add(stats);
            foreach (var buffer in Buffers)
            {
                buffer.AnnotatePosition(stats, 0);
            }

            var bytesList   = new List <byte[]>(values.Count);
            var presentList = new List <bool>(values.Count);
            var lengthList  = new List <long>(values.Count);

            foreach (var bytes in values)
            {
                stats.AddValue(bytes);
                if (values != null)
                {
                    bytesList.Add(bytes);
                    lengthList.Add(bytes.Length);
                }
                presentList.Add(bytes != null);
            }

            var presentEncoder = new BitWriter(_presentBuffer);

            presentEncoder.Write(presentList);
            if (stats.HasNull)
            {
                _presentBuffer.MustBeIncluded = true;
            }

            foreach (var bytes in bytesList)
            {
                _dataBuffer.Write(bytes, 0, bytes.Length);
            }

            var lengthEncoder = new IntegerRunLengthEncodingV2Writer(_lengthBuffer);

            lengthEncoder.Write(lengthList, false, _shouldAlignLengths);
        }
コード例 #7
0
        public void AddBlock(IList <DateTime?> values)
        {
            var stats = new TimestampWriterStatistics();

            Statistics.Add(stats);
            if (_isNullable)
            {
                _presentBuffer.AnnotatePosition(stats, rleValuesToConsume: 0, bitsToConsume: 0);
            }
            _dataBuffer.AnnotatePosition(stats, rleValuesToConsume: 0);
            _secondaryBuffer.AnnotatePosition(stats, rleValuesToConsume: 0);

            var secondsList   = new List <long>(values.Count);
            var fractionsList = new List <long>(values.Count);

            if (_isNullable)
            {
                var presentList = new List <bool>(values.Count);

                foreach (var value in values)
                {
                    if (!value.HasValue)
                    {
                        stats.AddValue(null);
                        presentList.Add(false);
                    }
                    else
                    {
                        long millisecondsSinceUnixEpoch;
                        long fraction;
                        var  seconds = GetValues(value.Value, out millisecondsSinceUnixEpoch, out fraction);
                        stats.AddValue(millisecondsSinceUnixEpoch);
                        presentList.Add(true);
                        secondsList.Add(seconds);
                        fractionsList.Add(fraction);
                    }
                }

                var presentEncoder = new BitWriter(_presentBuffer);
                presentEncoder.Write(presentList);
                if (stats.HasNull)
                {
                    _presentBuffer.MustBeIncluded = true;
                }
            }
            else
            {
                foreach (var value in values)
                {
                    long millisecondsSinceUnixEpoch;
                    long fraction;
                    var  seconds = GetValues(value.Value, out millisecondsSinceUnixEpoch, out fraction);
                    stats.AddValue(millisecondsSinceUnixEpoch);
                    secondsList.Add(seconds);
                    fractionsList.Add(fraction);
                }
            }

            var secondsEncoder = new IntegerRunLengthEncodingV2Writer(_dataBuffer);

            secondsEncoder.Write(secondsList, true, _shouldAlignEncodedValues);

            var fractionsEncoder = new IntegerRunLengthEncodingV2Writer(_secondaryBuffer);

            fractionsEncoder.Write(fractionsList, false, _shouldAlignEncodedValues);
        }
コード例 #8
0
ファイル: StringWriter.cs プロジェクト: sindhudweep/Orcneas
        private void WriteDictionaryEncodedData()
        {
            //Sort the dictionary
            var sortedDictionary = new List <string>();
            var i = 0;

            foreach (var dictEntry in _unsortedDictionary.OrderBy(d => d.Key, StringComparer.Ordinal))
            {
                sortedDictionary.Add(dictEntry.Key);
                dictEntry.Value.Id = i++;
            }

            //Write the dictionary
            var dictionaryLengthList = new List <long>();

            foreach (var dictEntry in sortedDictionary)
            {
                var bytes = Encoding.UTF8.GetBytes(dictEntry);
                dictionaryLengthList.Add(bytes.Length);              //Save the length
                _dictionaryDataBuffer.Write(bytes, 0, bytes.Length); //Write to the buffer
            }

            //Write the dictionary lengths
            var dictionaryLengthEncoder = new IntegerRunLengthEncodingV2Writer(_lengthBuffer);

            dictionaryLengthEncoder.Write(dictionaryLengthList, false, _shouldAlignLengths);

            //Write the lookup values
            var presentList              = new List <bool>(_dictionaryLookupValues.Count);
            var presentEncoder           = new BitWriter(_presentBuffer);
            var lookupList               = new List <long>(_dictionaryLookupValues.Count);
            var lookupEncoder            = new IntegerRunLengthEncodingV2Writer(_dataBuffer);
            var hasNull                  = false;
            var strideCount              = 0;
            StringWriterStatistics stats = null;

            foreach (var value in _dictionaryLookupValues)
            {
                if (stats == null)
                {
                    stats = new StringWriterStatistics();
                    Statistics.Add(stats);
                    foreach (var buffer in Buffers)
                    {
                        buffer.AnnotatePosition(stats, 0);
                    }
                }

                var stringValue =
                    sortedDictionary[value.Id]; //Look up the string value for this Id so we can notate statistics
                stats.AddValue(stringValue);
                presentList.Add(value != null);
                if (value != null)
                {
                    lookupList.Add(value.Id);
                }
                else
                {
                    hasNull = true;
                }

                if (++strideCount == _strideLength) //If it's time for new statistics
                {
                    //Flush to the buffers
                    presentEncoder.Write(presentList);
                    presentList.Clear();
                    if (hasNull)
                    {
                        _presentBuffer.MustBeIncluded = true;
                    }
                    lookupEncoder.Write(lookupList, false, _shouldAlignDictionaryLookup);
                    lookupList.Clear();

                    strideCount = 0;
                    stats       = null;
                }
            }
        }
コード例 #9
0
ファイル: StringWriter.cs プロジェクト: sindhudweep/Orcneas
        public void AddBlock(IList <string> values)
        {
            EnsureEncodingKindIsSet(values);

            if (ColumnEncoding == ColumnEncodingKind.DirectV2)
            {
                var stats = new StringWriterStatistics();
                Statistics.Add(stats);
                foreach (var buffer in Buffers)
                {
                    buffer.AnnotatePosition(stats, 0); //Our implementation always ends the RLE at the stride
                }
                var bytesList   = new List <byte[]>(values.Count);
                var presentList = new List <bool>(values.Count);
                var lengthList  = new List <long>(values.Count);

                foreach (var str in values)
                {
                    stats.AddValue(str);
                    if (str != null)
                    {
                        var bytes = Encoding.UTF8.GetBytes(str);
                        bytesList.Add(bytes);
                        lengthList.Add(bytes.Length);
                    }
                    presentList.Add(str != null);
                }

                var presentEncoder = new BitWriter(_presentBuffer);
                presentEncoder.Write(presentList);
                if (stats.HasNull)
                {
                    _presentBuffer.MustBeIncluded = true;
                }

                foreach (var bytes in bytesList)
                {
                    _dataBuffer.Write(bytes, 0, bytes.Length);
                }

                var lengthEncoder = new IntegerRunLengthEncodingV2Writer(_lengthBuffer);
                lengthEncoder.Write(lengthList, false, _shouldAlignLengths);
            }
            else if (ColumnEncoding == ColumnEncodingKind.DictionaryV2)
            {
                foreach (var value in values)
                {
                    if (value == null)
                    {
                        _dictionaryLookupValues.Add(null);
                    }
                    else
                    {
                        DictionaryEntry entry;
                        if (!_unsortedDictionary.TryGetValue(value, out entry))
                        {
                            entry = new DictionaryEntry();
                            _unsortedDictionary.Add(value, entry);
                        }
                        _dictionaryLookupValues.Add(entry);
                    }
                }
            }
            else
            {
                throw new ArgumentException();
            }
        }