/// <summary>
        ///  This is a second copy of the inner loop of decompressTags used when near the
        ///  end of the input. The key difference is the reading of the trailer bytes. The
        ///  fast code does a blind read of the next 4 bytes as an int, and this code
        ///  assembles the int byte-by-byte to assure that the array is not over run. The
        ///  reason this code path is separate is the if condition to choose between these
        ///  two seemingly small differences costs like 10-20% of the throughput. I'm
        ///  hoping in future' versions of hot-spot this code can be integrated into the
        ///  main loop but for now it is worth the extra maintenance pain to get the extra
        ///  10-20%.
        /// </summary>
        private static int[] DecompressTagSlow(byte[] input, int ipIndex, byte[] output, int outputLimit, int outputOffset, int opIndex)
        {
            // read the op code
            int opCode       = SnappyInternalUtils.LoadByte(input, ipIndex++);
            int entry        = SnappyInternalUtils.LookupShort(_opLookupTable, opCode);
            var trailerBytes = (int)((uint)entry >> 11);
            //
            // Key difference here
            //
            int trailer = 0;

            switch (trailerBytes)
            {
            case 4:
                trailer = (input[ipIndex + 3] & 0xff) << 24;
                goto case 3;

            case 3:
                trailer |= (input[ipIndex + 2] & 0xff) << 16;
                goto case 2;

            case 2:
                trailer |= (input[ipIndex + 1] & 0xff) << 8;
                goto case 1;

            case 1:
                trailer |= (input[ipIndex] & 0xff);
                break;
            }

            // advance the ipIndex past the op codes
            ipIndex += trailerBytes;
            int length = entry & 0xff;

            if ((opCode & 0x3) == Literal)
            {
                int literalLength = length + trailer;
                CopyLiteral(input, ipIndex, output, opIndex, literalLength);
                ipIndex += literalLength;
                opIndex += literalLength;
            }
            else
            {
                // copyOffset/256 is encoded in bits 8..10.  By just fetching
                // those bits, we get copyOffset (since the bit-field starts at
                // bit 8).
                int copyOffset = entry & 0x700;
                copyOffset += trailer;

                // inline to force hot-spot to keep inline
                {
                    int spaceLeft = outputLimit - opIndex;
                    int srcIndex  = opIndex - copyOffset;

                    if (srcIndex < outputOffset)
                    {
                        throw new CorruptionException("Invalid copy offset for opcode starting at " + (ipIndex - trailerBytes - 1));
                    }

                    if (length <= 16 && copyOffset >= 8 && spaceLeft >= 16)
                    {
                        // Fast path, used for the majority (70-80%) of dynamic invocations.
                        SnappyInternalUtils.CopyLong(output, srcIndex, output, opIndex);
                        SnappyInternalUtils.CopyLong(output, srcIndex + 8, output, opIndex + 8);
                    }
                    else if (spaceLeft >= length + MaxIncrementCopyOverflow)
                    {
                        IncrementalCopyFastPath(output, srcIndex, opIndex, length);
                    }
                    else
                    {
                        IncrementalCopy(output, srcIndex, output, opIndex, length);
                    }
                }
                opIndex += length;
            }
            return(new[] { ipIndex, opIndex });
        }
        private static int DecompressAllTags(
            byte[] input,
            int inputOffset,
            int inputSize,
            byte[] output,
            int outputOffset)
        {
            int outputLimit = output.Length;

            int ipLimit = inputOffset + inputSize;
            int opIndex = outputOffset;
            int ipIndex = inputOffset;

            while (ipIndex < ipLimit - 5)
            {
                int opCode       = SnappyInternalUtils.LoadByte(input, ipIndex++);
                int entry        = SnappyInternalUtils.LookupShort(_opLookupTable, opCode);
                var trailerBytes = (int)((uint)entry >> 11);
                int trailer      = ReadTrailer(input, ipIndex, trailerBytes);

                // advance the ipIndex past the op codes
                ipIndex += (int)((uint)entry >> 11);
                int length = entry & 0xff;

                if ((opCode & 0x3) == Literal)
                {
                    int literalLength = length + trailer;
                    CopyLiteral(input, ipIndex, output, opIndex, literalLength);
                    ipIndex += literalLength;
                    opIndex += literalLength;
                }
                else
                {
                    // copyOffset/256 is encoded in bits 8..10.  By just fetching
                    // those bits, we get copyOffset (since the bit-field starts at
                    // bit 8).
                    int copyOffset = entry & 0x700;
                    copyOffset += trailer;

                    // inline to force hot-spot to keep inline
                    //
                    // Equivalent to incrementalCopy (below) except that it can write up to ten extra
                    // bytes after the end of the copy, and that it is faster.
                    //
                    // The main part of this loop is a simple copy of eight bytes at a time until
                    // we've copied (at least) the requested amount of bytes.  However, if op and'
                    // src are less than eight bytes apart (indicating a repeating pattern of
                    // length < 8), we first need to expand the pattern in order to get the correct
                    // results. For instance, if the buffer looks like this, with the eight-byte
                    // <src> and <op> patterns marked as intervals:
                    //
                    //    abxxxxxxxxxxxx
                    //    [------]           src
                    //      [------]         op
                    //
                    // a single eight-byte copy from <src> to <op> will repeat the pattern once,
                    // after which we can move <op> two bytes without moving <src>:
                    //
                    //    ababxxxxxxxxxx
                    //    [------]           src
                    //        [------]       op
                    //
                    // and repeat the exercise until the two no longer overlap.
                    //
                    // This allows us to do very well in the special case of one single byte
                    // repeated many times, without taking a big hit for more general cases.
                    //
                    // The worst case of extra writing past the end of the match occurs when
                    // op - src == 1 and len == 1; the last copy will read from byte positions
                    // [0..7] and write to [4..11], whereas it was only supposed to write to
                    // position 1. Thus, ten excess bytes.
                    {
                        int spaceLeft = outputLimit - opIndex;
                        int srcIndex  = opIndex - copyOffset;
                        if (srcIndex < outputOffset)
                        {
                            throw new CorruptionException("Invalid copy offset for opcode starting at " + (ipIndex - trailerBytes - 1));
                        }

                        if (length <= 16 && copyOffset >= 8 && spaceLeft >= 16)
                        {
                            // Fast path, used for the majority (70-80%) of dynamic invocations.
                            SnappyInternalUtils.CopyLong(output, srcIndex, output, opIndex);
                            SnappyInternalUtils.CopyLong(output, srcIndex + 8, output, opIndex + 8);
                        }
                        else if (spaceLeft >= length + MaxIncrementCopyOverflow)
                        {
                            IncrementalCopyFastPath(output, srcIndex, opIndex, length);
                        }
                        else
                        {
                            IncrementalCopy(output, srcIndex, output, opIndex, length);
                        }
                    }
                    opIndex += length;
                }
            }


            for (; ipIndex < ipLimit;)
            {
                int[] result = DecompressTagSlow(input, ipIndex, output, outputLimit, outputOffset, opIndex);
                ipIndex = result[0];
                opIndex = result[1];
            }

            return(opIndex - outputOffset);
        }