/// <summary> /// This is a second copy of the inner loop of decompressTags used when near the /// end of the input. The key difference is the reading of the trailer bytes. The /// fast code does a blind read of the next 4 bytes as an int, and this code /// assembles the int byte-by-byte to assure that the array is not over run. The /// reason this code path is separate is the if condition to choose between these /// two seemingly small differences costs like 10-20% of the throughput. I'm /// hoping in future' versions of hot-spot this code can be integrated into the /// main loop but for now it is worth the extra maintenance pain to get the extra /// 10-20%. /// </summary> private static int[] DecompressTagSlow(byte[] input, int ipIndex, byte[] output, int outputLimit, int outputOffset, int opIndex) { // read the op code int opCode = SnappyInternalUtils.LoadByte(input, ipIndex++); int entry = SnappyInternalUtils.LookupShort(_opLookupTable, opCode); var trailerBytes = (int)((uint)entry >> 11); // // Key difference here // int trailer = 0; switch (trailerBytes) { case 4: trailer = (input[ipIndex + 3] & 0xff) << 24; goto case 3; case 3: trailer |= (input[ipIndex + 2] & 0xff) << 16; goto case 2; case 2: trailer |= (input[ipIndex + 1] & 0xff) << 8; goto case 1; case 1: trailer |= (input[ipIndex] & 0xff); break; } // advance the ipIndex past the op codes ipIndex += trailerBytes; int length = entry & 0xff; if ((opCode & 0x3) == Literal) { int literalLength = length + trailer; CopyLiteral(input, ipIndex, output, opIndex, literalLength); ipIndex += literalLength; opIndex += literalLength; } else { // copyOffset/256 is encoded in bits 8..10. By just fetching // those bits, we get copyOffset (since the bit-field starts at // bit 8). int copyOffset = entry & 0x700; copyOffset += trailer; // inline to force hot-spot to keep inline { int spaceLeft = outputLimit - opIndex; int srcIndex = opIndex - copyOffset; if (srcIndex < outputOffset) { throw new CorruptionException("Invalid copy offset for opcode starting at " + (ipIndex - trailerBytes - 1)); } if (length <= 16 && copyOffset >= 8 && spaceLeft >= 16) { // Fast path, used for the majority (70-80%) of dynamic invocations. SnappyInternalUtils.CopyLong(output, srcIndex, output, opIndex); SnappyInternalUtils.CopyLong(output, srcIndex + 8, output, opIndex + 8); } else if (spaceLeft >= length + MaxIncrementCopyOverflow) { IncrementalCopyFastPath(output, srcIndex, opIndex, length); } else { IncrementalCopy(output, srcIndex, output, opIndex, length); } } opIndex += length; } return(new[] { ipIndex, opIndex }); }
private static int DecompressAllTags( byte[] input, int inputOffset, int inputSize, byte[] output, int outputOffset) { int outputLimit = output.Length; int ipLimit = inputOffset + inputSize; int opIndex = outputOffset; int ipIndex = inputOffset; while (ipIndex < ipLimit - 5) { int opCode = SnappyInternalUtils.LoadByte(input, ipIndex++); int entry = SnappyInternalUtils.LookupShort(_opLookupTable, opCode); var trailerBytes = (int)((uint)entry >> 11); int trailer = ReadTrailer(input, ipIndex, trailerBytes); // advance the ipIndex past the op codes ipIndex += (int)((uint)entry >> 11); int length = entry & 0xff; if ((opCode & 0x3) == Literal) { int literalLength = length + trailer; CopyLiteral(input, ipIndex, output, opIndex, literalLength); ipIndex += literalLength; opIndex += literalLength; } else { // copyOffset/256 is encoded in bits 8..10. By just fetching // those bits, we get copyOffset (since the bit-field starts at // bit 8). int copyOffset = entry & 0x700; copyOffset += trailer; // inline to force hot-spot to keep inline // // Equivalent to incrementalCopy (below) except that it can write up to ten extra // bytes after the end of the copy, and that it is faster. // // The main part of this loop is a simple copy of eight bytes at a time until // we've copied (at least) the requested amount of bytes. However, if op and' // src are less than eight bytes apart (indicating a repeating pattern of // length < 8), we first need to expand the pattern in order to get the correct // results. For instance, if the buffer looks like this, with the eight-byte // <src> and <op> patterns marked as intervals: // // abxxxxxxxxxxxx // [------] src // [------] op // // a single eight-byte copy from <src> to <op> will repeat the pattern once, // after which we can move <op> two bytes without moving <src>: // // ababxxxxxxxxxx // [------] src // [------] op // // and repeat the exercise until the two no longer overlap. // // This allows us to do very well in the special case of one single byte // repeated many times, without taking a big hit for more general cases. // // The worst case of extra writing past the end of the match occurs when // op - src == 1 and len == 1; the last copy will read from byte positions // [0..7] and write to [4..11], whereas it was only supposed to write to // position 1. Thus, ten excess bytes. { int spaceLeft = outputLimit - opIndex; int srcIndex = opIndex - copyOffset; if (srcIndex < outputOffset) { throw new CorruptionException("Invalid copy offset for opcode starting at " + (ipIndex - trailerBytes - 1)); } if (length <= 16 && copyOffset >= 8 && spaceLeft >= 16) { // Fast path, used for the majority (70-80%) of dynamic invocations. SnappyInternalUtils.CopyLong(output, srcIndex, output, opIndex); SnappyInternalUtils.CopyLong(output, srcIndex + 8, output, opIndex + 8); } else if (spaceLeft >= length + MaxIncrementCopyOverflow) { IncrementalCopyFastPath(output, srcIndex, opIndex, length); } else { IncrementalCopy(output, srcIndex, output, opIndex, length); } } opIndex += length; } } for (; ipIndex < ipLimit;) { int[] result = DecompressTagSlow(input, ipIndex, output, outputLimit, outputOffset, opIndex); ipIndex = result[0]; opIndex = result[1]; } return(opIndex - outputOffset); }