private void DirectEncode(IEnumerable <long> values, int numValues, bool aligned,
                                  int?precalculatedFixedBitWidth)
        {
            int fixedBitWidth;

            if (precalculatedFixedBitWidth.HasValue)
            {
                fixedBitWidth = precalculatedFixedBitWidth.Value;
            }
            else
            {
                var histogram = values.GenerateHistogramOfBitWidths();
                fixedBitWidth = BitManipulation.GetBitsRequiredForPercentile(histogram, 1.0);
            }

            fixedBitWidth = aligned
                ? BitManipulation.FindNearestAlignedDirectWidth(fixedBitWidth)
                : BitManipulation.FindNearestDirectWidth(fixedBitWidth);
            var encodedFixedBitWidth = fixedBitWidth.EncodeDirectWidth();

            var byte1 = 0;

            byte1 |= 0x1 << 6;                           //7..6 Encoding Type
            byte1 |= (encodedFixedBitWidth & 0x1f) << 1; //5..1 Fixed Width
            byte1 |= (numValues - 1) >> 8;               //0    MSB of length
            var byte2 = (numValues - 1) & 0xff;          //7..0 LSBs of length

            _outputStream.WriteByte((byte)byte1);
            _outputStream.WriteByte((byte)byte2);
            _outputStream.WriteBitpackedIntegers(values, fixedBitWidth);
        }
        private void PatchEncode(long baseValue, long[] baseReducedValues, int originalBitWidth, int reducedBitWidth)
        {
            var baseIsNegative = baseValue < 0;

            if (baseIsNegative)
            {
                baseValue = -baseValue;
            }
            var numBitsBaseValue =
                BitManipulation.NumBits((ulong)baseValue) + 1;  //Need one additional bit for the sign
            var numBytesBaseValue = numBitsBaseValue / 8;

            if (numBitsBaseValue % 8 != 0)
            {
                numBytesBaseValue++; //Some remainder
            }
            if (baseIsNegative)
            {
                baseValue |= 1L << (numBytesBaseValue * 8 - 1); //Set the MSB to 1 to mark the sign
            }
            var patchBitWidth = BitManipulation.FindNearestDirectWidth(originalBitWidth - reducedBitWidth);

            if (patchBitWidth == 64)
            {
                patchBitWidth   = 56;
                reducedBitWidth = 8;
            }
            var encodedPatchBitWidth = patchBitWidth.EncodeDirectWidth();
            var valueBitWidth        = BitManipulation.FindNearestDirectWidth(reducedBitWidth);
            var encodedValueBitWidth = valueBitWidth.EncodeDirectWidth();

            int gapBitWidth;
            var patchGapList      = GeneratePatchList(baseReducedValues, patchBitWidth, reducedBitWidth, out gapBitWidth);
            var patchListBitWidth = BitManipulation.FindNearestDirectWidth(gapBitWidth + patchBitWidth);


            int byte1 = 0, byte2 = 0, byte3 = 0, byte4 = 0;

            byte1 |= 0x2 << 6;                              //7..6 Encoding Type
            byte1 |= (encodedValueBitWidth & 0x1f) << 1;    //5..1 Value Bit Width
            byte1 |= (baseReducedValues.Length - 1) >> 8;   //0    MSB of length
            byte2 |= (baseReducedValues.Length - 1) & 0xff; //7..0 LSBs of length
            byte3 |= (numBytesBaseValue - 1) << 5;          //7..5 Base Value Byte Width
            byte3 |= encodedPatchBitWidth & 0x1f;           //4..0 Encoded Patch Bit Width
            byte4 |= (gapBitWidth - 1) << 5;                //7..5 Gap Bit Width
            byte4 |= patchGapList.Length & 0x1f;            //4..0 Patch/Gap List Length

            _outputStream.WriteByte((byte)byte1);
            _outputStream.WriteByte((byte)byte2);
            _outputStream.WriteByte((byte)byte3);
            _outputStream.WriteByte((byte)byte4);
            _outputStream.WriteLongBE(numBytesBaseValue, baseValue);
            _outputStream.WriteBitpackedIntegers(baseReducedValues, valueBitWidth);
            _outputStream.WriteBitpackedIntegers(patchGapList, patchListBitWidth);
        }
Beispiel #3
0
 public IEnumerable <BigInteger> Read()
 {
     while (true)
     {
         var bigInt = BitManipulation.ReadBigVarInt(_inputStream);
         if (bigInt.HasValue)
         {
             yield return(bigInt.Value);
         }
         else
         {
             yield break;
         }
     }
 }
        private void ShortRepeatEncode(long value, int repeatCount)
        {
            var bits  = BitManipulation.NumBits((ulong)value);
            var width = bits / 8;

            if (bits % 8 != 0)
            {
                width++; //Some remainder
            }
            var byte1 = 0;

            byte1 |= 0x0 << 6;
            byte1 |= (width - 1) << 3;
            byte1 |= repeatCount - 3;

            _outputStream.WriteByte((byte)byte1);
            _outputStream.WriteLongBE(width, value);
        }
        private bool TryPatchEncoding(IEnumerable <long> zigZagValues, IList <long> values, long minValue,
                                      ref int?fixedBitWidth, out int length)
        {
            var zigZagValuesHistogram = zigZagValues.GenerateHistogramOfBitWidths();
            var zigZagHundredthBits   = BitManipulation.GetBitsRequiredForPercentile(zigZagValuesHistogram, 1.0);

            fixedBitWidth = zigZagHundredthBits; //We'll use this later if if end up DIRECT encoding
            var zigZagNinetiethBits = BitManipulation.GetBitsRequiredForPercentile(zigZagValuesHistogram, 0.9);

            if (zigZagHundredthBits - zigZagNinetiethBits == 0)
            {
                //Requires as many bits even if we eliminate 10% of the most difficult values
                length = 0;
                return(false);
            }

            var baseReducedValues = new long[values.Count];
            var i = 0;

            foreach (var value in values)
            {
                baseReducedValues[i++] = value - minValue;
            }

            var baseReducedValuesHistogram = baseReducedValues.GenerateHistogramOfBitWidths();
            var baseReducedHundredthBits   =
                BitManipulation.GetBitsRequiredForPercentile(baseReducedValuesHistogram, 1.0);
            var baseReducedNinetyfifthBits =
                BitManipulation.GetBitsRequiredForPercentile(baseReducedValuesHistogram, 0.95);

            if (baseReducedHundredthBits - baseReducedNinetyfifthBits == 0)
            {
                //In the end, no benefit could be realized from patching
                length = 0;
                return(false);
            }

            PatchEncode(minValue, baseReducedValues, baseReducedHundredthBits, baseReducedNinetyfifthBits);
            length = values.Count;
            return(true);
        }
        IEnumerable <long> ReadPatchedBaseValues(int firstByte)
        {
            var encodedWidth = (firstByte >> 1) & 0x1f;
            var width        = encodedWidth.DecodeDirectWidth();
            int length       = (firstByte & 0x1) << 8;

            length |= _inputStream.CheckedReadByte();
            length += 1;

            var thirdByte         = _inputStream.CheckedReadByte();
            var baseValueWidth    = ((thirdByte >> 5) & 0x7) + 1;
            var encodedPatchWidth = thirdByte & 0x1f;
            var patchWidth        = encodedPatchWidth.DecodeDirectWidth();

            var fourthByte      = _inputStream.CheckedReadByte();
            var patchGapWidth   = ((fourthByte >> 5) & 0x7) + 1;
            var patchListLength = fourthByte & 0x1f;

            long baseValue = _inputStream.ReadLongBE(baseValueWidth);
            long msbMask   = (1L << ((baseValueWidth * 8) - 1));

            if ((baseValue & msbMask) != 0)
            {
                baseValue = baseValue & ~msbMask;
                baseValue = -baseValue;
            }

            //Buffer all the values so we can patch them
            var dataValues = _inputStream.ReadBitpackedIntegers(width, length).ToArray();

            if (patchGapWidth + patchWidth > 64)
            {
                throw new InvalidDataException($"{nameof(patchGapWidth)} ({patchGapWidth}) + {nameof(patchWidth)} ({patchWidth}) > 64");
            }

            var patchListWidth  = BitManipulation.FindNearestDirectWidth(patchWidth + patchGapWidth);
            var patchListValues = _inputStream.ReadBitpackedIntegers(patchListWidth, patchListLength).ToArray();

            int  patchIndex = 0;
            long gap        = 0;
            long patch;

            GetNextPatch(patchListValues, ref patchIndex, ref gap, out patch, patchWidth, (1L << patchWidth) - 1);

            for (int i = 0; i < length; i++)
            {
                if (i == gap)
                {
                    var patchedValue = dataValues[i] | (patch << width);
                    yield return(baseValue + patchedValue);

                    if (patchIndex < patchListLength)
                    {
                        GetNextPatch(patchListValues, ref patchIndex, ref gap, out patch, patchWidth, (1L << patchWidth) - 1);
                    }
                }
                else
                {
                    yield return(baseValue + dataValues[i]);
                }
            }
        }
        private long[] GeneratePatchList(long[] baseReducedValues, int patchBitWidth, int reducedBitWidth,
                                         out int gapBitWidth)
        {
            var prevIndex = 0;
            var maxGap    = 0;

            var mask = (1L << reducedBitWidth) - 1;

            var estimatedPatchCount =
                (int)(baseReducedValues.Length * 0.05 + .5);  //We're patching 5% of the values (round up)
            var patchGapList = new List <Tuple <int, long> >(estimatedPatchCount);

            for (var i = 0; i < baseReducedValues.Length; i++)
            {
                if (baseReducedValues[i] > mask)
                {
                    var gap = i - prevIndex;
                    if (gap > maxGap)
                    {
                        maxGap = gap;
                    }

                    var patch = (long)((ulong)baseReducedValues[i] >> reducedBitWidth);
                    patchGapList.Add(Tuple.Create(gap, patch));

                    baseReducedValues[i] &= mask;
                    prevIndex             = i;
                }
            }

            var actualLength = patchGapList.Count;

            if (maxGap == 0 && patchGapList.Count != 0)
            {
                gapBitWidth = 1;
            }
            else
            {
                gapBitWidth = BitManipulation.FindNearestDirectWidth(BitManipulation.NumBits((ulong)maxGap));
            }
            if (gapBitWidth > 8)
            {
                //Prepare for the special case of 511 and 256
                gapBitWidth = 8;
                if (maxGap == 511)
                {
                    actualLength += 2;
                }
                else
                {
                    actualLength += 1;
                }
            }

            var resultIndex = 0;
            var result      = new long[actualLength];

            foreach (var patchGap in patchGapList)
            {
                long gap   = patchGap.Item1;
                var  patch = patchGap.Item2;
                while (gap > 255)
                {
                    result[resultIndex++] = 255L << patchBitWidth;
                    gap -= 255;
                }
                result[resultIndex++] = (gap << patchBitWidth) | patch;
            }

            return(result);
        }
        private DeltaEncodingResult TryDeltaEncoding(IList <long> values, bool areSigned, bool aligned, out int length,
                                                     out long minValue)
        {
            var deltas       = new long[values.Count - 1];
            var initialValue = values[0];

            minValue = initialValue; //This gets saved for the patch base if things don't work out here
            var maxValue     = initialValue;
            var initialDelta = values[1] - initialValue;
            var curDelta     = initialDelta;
            long
                deltaMax =
                0;     //This is different from the java implementation.  I believe their implementation may be a bug.
            //The first delta value is not considered for the delta bit width, so don't include it in the max value calculation
            var isIncreasing    = initialDelta > 0;
            var isDecreasing    = initialDelta < 0;
            var isConstantDelta = true;

            deltas[0] = initialDelta;

            var previousValue = initialValue;
            var i             = 1;

            foreach (var value in values.Skip(1))
            {
                curDelta = value - previousValue;
                if (value < minValue)
                {
                    minValue = value;
                }
                if (value > maxValue)
                {
                    maxValue = value;
                }

                if (value < previousValue)
                {
                    isIncreasing = false;
                }
                if (previousValue > value)
                {
                    isDecreasing = false;
                }

                if (curDelta != initialDelta)
                {
                    isConstantDelta = false;
                }

                if (i > 1) //Don't rewrite the first value because it holds the sign of the remaining values
                {
                    var absCurrDelta = Math.Abs(curDelta);
                    deltas[i - 1] = absCurrDelta;
                    if (absCurrDelta > deltaMax)
                    {
                        deltaMax = absCurrDelta;
                    }
                }

                i++;
                previousValue = value;
            }

            if (BitManipulation.SubtractionWouldOverflow(maxValue, minValue))
            {
                length = 0;
                return(DeltaEncodingResult.Overflow);
            }

            if (maxValue == minValue) //All values were identical
            {
                DeltaEncode(minValue, areSigned, values.Count);
                length = values.Count;
                return(DeltaEncodingResult.Success);
            }

            if (isConstantDelta) //All values changed by set amount
            {
                DeltaEncode(initialValue, areSigned, curDelta, values.Count);
                length = values.Count;
                return(DeltaEncodingResult.Success);
            }

            if (isIncreasing || isDecreasing)
            {
                var deltaBits = BitManipulation.NumBits((ulong)deltaMax);
                deltaBits = aligned
                    ? BitManipulation.FindNearestAlignedDirectWidth(deltaBits)
                    : BitManipulation.FindNearestDirectWidth(deltaBits);

                DeltaEncode(initialValue, areSigned, values.Count, deltas, deltaBits);
                length = values.Count;
                return(DeltaEncodingResult.Success);
            }

            length = 0;
            return(DeltaEncodingResult.NonMonotonic);
        }
Beispiel #9
0
        DeltaEncodingResult TryDeltaEncoding(IList <long> values, bool areSigned, bool aligned, out int length, out long minValue)
        {
            var  deltas       = new long[values.Count - 1];
            long initialValue = values[0];

            minValue = initialValue;                                                            //This gets saved for the patch base if things don't work out here
            long maxValue     = initialValue;
            long initialDelta = values[1] - initialValue;
            long curDelta     = initialDelta;
            long deltaMax     = 0;              //This is different from the java implementation.  I believe their implementation may be a bug.
            //The first delta value is not considered for the delta bit width, so don't include it in the max value calculation
            bool isIncreasing    = initialDelta > 0;
            bool isDecreasing    = initialDelta < 0;
            bool isConstantDelta = true;

            long previousValue = values[1];

            if (values[1] < minValue)
            {
                minValue = values[1];
            }
            if (values[1] > maxValue)
            {
                maxValue = values[1];
            }

            deltas[0] = initialDelta;

            int i = 2;

            foreach (var value in values.Skip(2))               //The first value is initialValue. The second value is initialDelta, already loaded. Start with the third value
            {
                curDelta = value - previousValue;
                if (value < minValue)
                {
                    minValue = value;
                }
                if (value > maxValue)
                {
                    maxValue = value;
                }

                if (value < previousValue)
                {
                    isIncreasing = false;
                }
                if (value > previousValue)
                {
                    isDecreasing = false;
                }

                if (curDelta != initialDelta)
                {
                    isConstantDelta = false;
                }

                var absCurrDelta = Math.Abs(curDelta);
                deltas[i - 1] = absCurrDelta;
                if (absCurrDelta > deltaMax)
                {
                    deltaMax = absCurrDelta;
                }

                i++;
                previousValue = value;
            }

            if (BitManipulation.SubtractionWouldOverflow(maxValue, minValue))
            {
                length = 0;
                return(DeltaEncodingResult.Overflow);
            }

            if (maxValue == minValue)               //All values after the first were identical
            {
                DeltaEncode(minValue, areSigned, values.Count);
                length = values.Count;
                return(DeltaEncodingResult.Success);
            }

            if (isConstantDelta)            //All values changed by set amount
            {
                DeltaEncode(initialValue, areSigned, curDelta, values.Count);
                length = values.Count;
                return(DeltaEncodingResult.Success);
            }

            if (isIncreasing || isDecreasing)
            {
                var deltaBits = BitManipulation.NumBits((ulong)deltaMax);
                if (aligned)
                {
                    deltaBits = BitManipulation.FindNearestAlignedDirectWidth(deltaBits);
                }
                else
                {
                    deltaBits = BitManipulation.FindNearestDirectWidth(deltaBits);
                }

                DeltaEncode(initialValue, areSigned, values.Count, deltas, deltaBits);
                length = values.Count;
                return(DeltaEncodingResult.Success);
            }

            length = 0;
            return(DeltaEncodingResult.NonMonotonic);
        }