private void DirectEncode(IEnumerable <long> values, int numValues, bool aligned, int?precalculatedFixedBitWidth) { int fixedBitWidth; if (precalculatedFixedBitWidth.HasValue) { fixedBitWidth = precalculatedFixedBitWidth.Value; } else { var histogram = values.GenerateHistogramOfBitWidths(); fixedBitWidth = BitManipulation.GetBitsRequiredForPercentile(histogram, 1.0); } fixedBitWidth = aligned ? BitManipulation.FindNearestAlignedDirectWidth(fixedBitWidth) : BitManipulation.FindNearestDirectWidth(fixedBitWidth); var encodedFixedBitWidth = fixedBitWidth.EncodeDirectWidth(); var byte1 = 0; byte1 |= 0x1 << 6; //7..6 Encoding Type byte1 |= (encodedFixedBitWidth & 0x1f) << 1; //5..1 Fixed Width byte1 |= (numValues - 1) >> 8; //0 MSB of length var byte2 = (numValues - 1) & 0xff; //7..0 LSBs of length _outputStream.WriteByte((byte)byte1); _outputStream.WriteByte((byte)byte2); _outputStream.WriteBitpackedIntegers(values, fixedBitWidth); }
private void PatchEncode(long baseValue, long[] baseReducedValues, int originalBitWidth, int reducedBitWidth) { var baseIsNegative = baseValue < 0; if (baseIsNegative) { baseValue = -baseValue; } var numBitsBaseValue = BitManipulation.NumBits((ulong)baseValue) + 1; //Need one additional bit for the sign var numBytesBaseValue = numBitsBaseValue / 8; if (numBitsBaseValue % 8 != 0) { numBytesBaseValue++; //Some remainder } if (baseIsNegative) { baseValue |= 1L << (numBytesBaseValue * 8 - 1); //Set the MSB to 1 to mark the sign } var patchBitWidth = BitManipulation.FindNearestDirectWidth(originalBitWidth - reducedBitWidth); if (patchBitWidth == 64) { patchBitWidth = 56; reducedBitWidth = 8; } var encodedPatchBitWidth = patchBitWidth.EncodeDirectWidth(); var valueBitWidth = BitManipulation.FindNearestDirectWidth(reducedBitWidth); var encodedValueBitWidth = valueBitWidth.EncodeDirectWidth(); int gapBitWidth; var patchGapList = GeneratePatchList(baseReducedValues, patchBitWidth, reducedBitWidth, out gapBitWidth); var patchListBitWidth = BitManipulation.FindNearestDirectWidth(gapBitWidth + patchBitWidth); int byte1 = 0, byte2 = 0, byte3 = 0, byte4 = 0; byte1 |= 0x2 << 6; //7..6 Encoding Type byte1 |= (encodedValueBitWidth & 0x1f) << 1; //5..1 Value Bit Width byte1 |= (baseReducedValues.Length - 1) >> 8; //0 MSB of length byte2 |= (baseReducedValues.Length - 1) & 0xff; //7..0 LSBs of length byte3 |= (numBytesBaseValue - 1) << 5; //7..5 Base Value Byte Width byte3 |= encodedPatchBitWidth & 0x1f; //4..0 Encoded Patch Bit Width byte4 |= (gapBitWidth - 1) << 5; //7..5 Gap Bit Width byte4 |= patchGapList.Length & 0x1f; //4..0 Patch/Gap List Length _outputStream.WriteByte((byte)byte1); _outputStream.WriteByte((byte)byte2); _outputStream.WriteByte((byte)byte3); _outputStream.WriteByte((byte)byte4); _outputStream.WriteLongBE(numBytesBaseValue, baseValue); _outputStream.WriteBitpackedIntegers(baseReducedValues, valueBitWidth); _outputStream.WriteBitpackedIntegers(patchGapList, patchListBitWidth); }
void ShortRepeatEncode(long value, int repeatCount) { var bits = BitManipulation.FindNearestDirectWidth(BitManipulation.NumBits((ulong)value)); var width = bits / 8; if (bits % 8 != 0) { width++; //Some remainder } int byte1 = 0; byte1 |= 0x0 << 6; byte1 |= (width - 1) << 3; byte1 |= repeatCount - 3; _outputStream.WriteByte((byte)byte1); _outputStream.WriteLongBE(width, value); }
IEnumerable <long> ReadPatchedBaseValues(int firstByte) { var encodedWidth = (firstByte >> 1) & 0x1f; var width = encodedWidth.DecodeDirectWidth(); int length = (firstByte & 0x1) << 8; length |= _inputStream.CheckedReadByte(); length += 1; var thirdByte = _inputStream.CheckedReadByte(); var baseValueWidth = ((thirdByte >> 5) & 0x7) + 1; var encodedPatchWidth = thirdByte & 0x1f; var patchWidth = encodedPatchWidth.DecodeDirectWidth(); var fourthByte = _inputStream.CheckedReadByte(); var patchGapWidth = ((fourthByte >> 5) & 0x7) + 1; var patchListLength = fourthByte & 0x1f; long baseValue = _inputStream.ReadLongBE(baseValueWidth); long msbMask = (1L << ((baseValueWidth * 8) - 1)); if ((baseValue & msbMask) != 0) { baseValue = baseValue & ~msbMask; baseValue = -baseValue; } //Buffer all the values so we can patch them var dataValues = _inputStream.ReadBitpackedIntegers(width, length).ToArray(); if (patchGapWidth + patchWidth > 64) { throw new InvalidDataException($"{nameof(patchGapWidth)} ({patchGapWidth}) + {nameof(patchWidth)} ({patchWidth}) > 64"); } var patchListWidth = BitManipulation.FindNearestDirectWidth(patchWidth + patchGapWidth); var patchListValues = _inputStream.ReadBitpackedIntegers(patchListWidth, patchListLength).ToArray(); int patchIndex = 0; long gap = 0; long patch; GetNextPatch(patchListValues, ref patchIndex, ref gap, out patch, patchWidth, (1L << patchWidth) - 1); for (int i = 0; i < length; i++) { if (i == gap) { var patchedValue = dataValues[i] | (patch << width); yield return(baseValue + patchedValue); if (patchIndex < patchListLength) { GetNextPatch(patchListValues, ref patchIndex, ref gap, out patch, patchWidth, (1L << patchWidth) - 1); } } else { yield return(baseValue + dataValues[i]); } } }
private long[] GeneratePatchList(long[] baseReducedValues, int patchBitWidth, int reducedBitWidth, out int gapBitWidth) { var prevIndex = 0; var maxGap = 0; var mask = (1L << reducedBitWidth) - 1; var estimatedPatchCount = (int)(baseReducedValues.Length * 0.05 + .5); //We're patching 5% of the values (round up) var patchGapList = new List <Tuple <int, long> >(estimatedPatchCount); for (var i = 0; i < baseReducedValues.Length; i++) { if (baseReducedValues[i] > mask) { var gap = i - prevIndex; if (gap > maxGap) { maxGap = gap; } var patch = (long)((ulong)baseReducedValues[i] >> reducedBitWidth); patchGapList.Add(Tuple.Create(gap, patch)); baseReducedValues[i] &= mask; prevIndex = i; } } var actualLength = patchGapList.Count; if (maxGap == 0 && patchGapList.Count != 0) { gapBitWidth = 1; } else { gapBitWidth = BitManipulation.FindNearestDirectWidth(BitManipulation.NumBits((ulong)maxGap)); } if (gapBitWidth > 8) { //Prepare for the special case of 511 and 256 gapBitWidth = 8; if (maxGap == 511) { actualLength += 2; } else { actualLength += 1; } } var resultIndex = 0; var result = new long[actualLength]; foreach (var patchGap in patchGapList) { long gap = patchGap.Item1; var patch = patchGap.Item2; while (gap > 255) { result[resultIndex++] = 255L << patchBitWidth; gap -= 255; } result[resultIndex++] = (gap << patchBitWidth) | patch; } return(result); }
private DeltaEncodingResult TryDeltaEncoding(IList <long> values, bool areSigned, bool aligned, out int length, out long minValue) { var deltas = new long[values.Count - 1]; var initialValue = values[0]; minValue = initialValue; //This gets saved for the patch base if things don't work out here var maxValue = initialValue; var initialDelta = values[1] - initialValue; var curDelta = initialDelta; long deltaMax = 0; //This is different from the java implementation. I believe their implementation may be a bug. //The first delta value is not considered for the delta bit width, so don't include it in the max value calculation var isIncreasing = initialDelta > 0; var isDecreasing = initialDelta < 0; var isConstantDelta = true; deltas[0] = initialDelta; var previousValue = initialValue; var i = 1; foreach (var value in values.Skip(1)) { curDelta = value - previousValue; if (value < minValue) { minValue = value; } if (value > maxValue) { maxValue = value; } if (value < previousValue) { isIncreasing = false; } if (previousValue > value) { isDecreasing = false; } if (curDelta != initialDelta) { isConstantDelta = false; } if (i > 1) //Don't rewrite the first value because it holds the sign of the remaining values { var absCurrDelta = Math.Abs(curDelta); deltas[i - 1] = absCurrDelta; if (absCurrDelta > deltaMax) { deltaMax = absCurrDelta; } } i++; previousValue = value; } if (BitManipulation.SubtractionWouldOverflow(maxValue, minValue)) { length = 0; return(DeltaEncodingResult.Overflow); } if (maxValue == minValue) //All values were identical { DeltaEncode(minValue, areSigned, values.Count); length = values.Count; return(DeltaEncodingResult.Success); } if (isConstantDelta) //All values changed by set amount { DeltaEncode(initialValue, areSigned, curDelta, values.Count); length = values.Count; return(DeltaEncodingResult.Success); } if (isIncreasing || isDecreasing) { var deltaBits = BitManipulation.NumBits((ulong)deltaMax); deltaBits = aligned ? BitManipulation.FindNearestAlignedDirectWidth(deltaBits) : BitManipulation.FindNearestDirectWidth(deltaBits); DeltaEncode(initialValue, areSigned, values.Count, deltas, deltaBits); length = values.Count; return(DeltaEncodingResult.Success); } length = 0; return(DeltaEncodingResult.NonMonotonic); }
DeltaEncodingResult TryDeltaEncoding(IList <long> values, bool areSigned, bool aligned, out int length, out long minValue) { var deltas = new long[values.Count - 1]; long initialValue = values[0]; minValue = initialValue; //This gets saved for the patch base if things don't work out here long maxValue = initialValue; long initialDelta = values[1] - initialValue; long curDelta = initialDelta; long deltaMax = 0; //This is different from the java implementation. I believe their implementation may be a bug. //The first delta value is not considered for the delta bit width, so don't include it in the max value calculation bool isIncreasing = initialDelta > 0; bool isDecreasing = initialDelta < 0; bool isConstantDelta = true; long previousValue = values[1]; if (values[1] < minValue) { minValue = values[1]; } if (values[1] > maxValue) { maxValue = values[1]; } deltas[0] = initialDelta; int i = 2; foreach (var value in values.Skip(2)) //The first value is initialValue. The second value is initialDelta, already loaded. Start with the third value { curDelta = value - previousValue; if (value < minValue) { minValue = value; } if (value > maxValue) { maxValue = value; } if (value < previousValue) { isIncreasing = false; } if (value > previousValue) { isDecreasing = false; } if (curDelta != initialDelta) { isConstantDelta = false; } var absCurrDelta = Math.Abs(curDelta); deltas[i - 1] = absCurrDelta; if (absCurrDelta > deltaMax) { deltaMax = absCurrDelta; } i++; previousValue = value; } if (BitManipulation.SubtractionWouldOverflow(maxValue, minValue)) { length = 0; return(DeltaEncodingResult.Overflow); } if (maxValue == minValue) //All values after the first were identical { DeltaEncode(minValue, areSigned, values.Count); length = values.Count; return(DeltaEncodingResult.Success); } if (isConstantDelta) //All values changed by set amount { DeltaEncode(initialValue, areSigned, curDelta, values.Count); length = values.Count; return(DeltaEncodingResult.Success); } if (isIncreasing || isDecreasing) { var deltaBits = BitManipulation.NumBits((ulong)deltaMax); if (aligned) { deltaBits = BitManipulation.FindNearestAlignedDirectWidth(deltaBits); } else { deltaBits = BitManipulation.FindNearestDirectWidth(deltaBits); } DeltaEncode(initialValue, areSigned, values.Count, deltas, deltaBits); length = values.Count; return(DeltaEncodingResult.Success); } length = 0; return(DeltaEncodingResult.NonMonotonic); }