Пример #1
0
 private static void EncodeLen(int l, DataOutput @out)
 {
     while (l >= 0xFF)
     {
         @out.WriteByte(/*(byte)*/0xFF); // LUCENENET: Removed unnecessary cast
         l -= 0xFF;
     }
     @out.WriteByte((byte)l);
 }
Пример #2
0
        internal const int OPTIMAL_ML = 0x0F + 4 - 1; // match length that doesn't require an additional byte

        /// <summary>
        /// Compress <c>bytes[off:off+len]</c> into <paramref name="out"/> using
        /// at most 16KB of memory. <paramref name="ht"/> shouldn't be shared across threads
        /// but can safely be reused.
        /// </summary>
        public static void Compress(byte[] bytes, int off, int len, DataOutput @out, HashTable ht)
        {
            int @base = off;
            int end = off + len;

            int anchor = off++;

            if (len > LAST_LITERALS + MIN_MATCH)
            {
                int limit = end - LAST_LITERALS;
                int matchLimit = limit - MIN_MATCH;
                ht.Reset(len);
                int hashLog = ht.hashLog;
                PackedInt32s.Mutable hashTable = ht.hashTable;

                while (off <= limit)
                {
                    // find a match
                    int @ref;
                    while (true)
                    {
                        if (off >= matchLimit)
                        {
                            goto mainBreak;
                        }
                        int v = ReadInt32(bytes, off);
                        int h = Hash(v, hashLog);
                        @ref = @base + (int)hashTable.Get(h);
                        if (Debugging.AssertsEnabled) Debugging.Assert(PackedInt32s.BitsRequired(off - @base) <= hashTable.BitsPerValue);
                        hashTable.Set(h, off - @base);
                        if (off - @ref < MAX_DISTANCE && ReadInt32(bytes, @ref) == v)
                        {
                            break;
                        }
                        ++off;
                    }

                    // compute match length
                    int matchLen = MIN_MATCH + CommonBytes(bytes, @ref + MIN_MATCH, off + MIN_MATCH, limit);

                    EncodeSequence(bytes, anchor, @ref, off, matchLen, @out);
                    off += matchLen;
                    anchor = off;
                //mainContinue: ; // LUCENENET NOTE: Not Referenced
                }
            mainBreak: ;
            }

            // last literals
            int literalLen = end - anchor;
            if (Debugging.AssertsEnabled) Debugging.Assert(literalLen >= LAST_LITERALS || literalLen == len);
            EncodeLastLiterals(bytes, anchor, end - anchor, @out);
        }
Пример #3
0
 /// <summary>
 /// Compress bytes into <paramref name="out"/>. It it the responsibility of the
 /// compressor to add all necessary information so that a <see cref="Decompressor"/>
 /// will know when to stop decompressing bytes from the stream.
 /// </summary>
 public abstract void Compress(byte[] bytes, int off, int len, DataOutput @out);
Пример #4
0
        private static void EncodeSequence(byte[] bytes, int anchor, int matchRef, int matchOff, int matchLen, DataOutput @out)
        {
            int literalLen = matchOff - anchor;
            if (Debugging.AssertsEnabled) Debugging.Assert(matchLen >= 4);
            // encode token
            int token = (Math.Min(literalLen, 0x0F) << 4) | Math.Min(matchLen - 4, 0x0F);
            EncodeLiterals(bytes, token, anchor, literalLen, @out);

            // encode match dec
            int matchDec = matchOff - matchRef;
            if (Debugging.AssertsEnabled) Debugging.Assert(matchDec > 0 && matchDec < 1 << 16);
            @out.WriteByte((byte)matchDec);
            @out.WriteByte((byte)matchDec.TripleShift(8));

            // encode match len
            if (matchLen >= MIN_MATCH + 0x0F)
            {
                EncodeLen(matchLen - 0x0F - MIN_MATCH, @out);
            }
        }
Пример #5
0
        private static void EncodeLiterals(byte[] bytes, int token, int anchor, int literalLen, DataOutput @out)
        {
            @out.WriteByte((byte)token);

            // encode literal length
            if (literalLen >= 0x0F)
            {
                EncodeLen(literalLen - 0x0F, @out);
            }

            // encode literals
            @out.WriteBytes(bytes, anchor, literalLen);
        }
Пример #6
0
 private static void EncodeLastLiterals(byte[] bytes, int anchor, int literalLen, DataOutput @out)
 {
     int token = Math.Min(literalLen, 0x0F) << 4;
     EncodeLiterals(bytes, token, anchor, literalLen, @out);
 }
Пример #7
0
        /// <summary>
        /// Compress <c>bytes[off:off+len]</c> into <paramref name="out"/>. Compared to
        /// <see cref="LZ4.Compress(byte[], int, int, DataOutput, HashTable)"/>, this method
        /// is slower and uses more memory (~ 256KB per thread) but should provide
        /// better compression ratios (especially on large inputs) because it chooses
        /// the best match among up to 256 candidates and then performs trade-offs to
        /// fix overlapping matches. <paramref name="ht"/> shouldn't be shared across threads
        /// but can safely be reused.
        /// </summary>
        public static void CompressHC(byte[] src, int srcOff, int srcLen, DataOutput @out, HCHashTable ht)
        {
            int srcEnd = srcOff + srcLen;
            int matchLimit = srcEnd - LAST_LITERALS;
            int mfLimit = matchLimit - MIN_MATCH;

            int sOff = srcOff;
            int anchor = sOff++;

            ht.Reset(srcOff);
            Match match0 = new Match();
            Match match1 = new Match();
            Match match2 = new Match();
            Match match3 = new Match();

            while (sOff <= mfLimit)
            {
                if (!ht.InsertAndFindBestMatch(src, sOff, matchLimit, match1))
                {
                    ++sOff;
                    continue;
                }

                // saved, in case we would skip too much
                CopyTo(match1, match0);

                while (true)
                {
                    if (Debugging.AssertsEnabled) Debugging.Assert(match1.start >= anchor);
                    if (match1.End() >= mfLimit || !ht.InsertAndFindWiderMatch(src, match1.End() - 2, match1.start + 1, matchLimit, match1.len, match2))
                    {
                        // no better match
                        EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                        anchor = sOff = match1.End();
                        goto mainContinue;
                    }

                    if (match0.start < match1.start)
                    {
                        if (match2.start < match1.start + match0.len) // empirical
                        {
                            CopyTo(match0, match1);
                        }
                    }
                    if (Debugging.AssertsEnabled) Debugging.Assert(match2.start > match1.start);

                    if (match2.start - match1.start < 3) // First Match too small : removed
                    {
                        CopyTo(match2, match1);
                        goto search2Continue;
                    }

                    while (true)
                    {
                        if (match2.start - match1.start < OPTIMAL_ML)
                        {
                            int newMatchLen = match1.len;
                            if (newMatchLen > OPTIMAL_ML)
                            {
                                newMatchLen = OPTIMAL_ML;
                            }
                            if (match1.start + newMatchLen > match2.End() - MIN_MATCH)
                            {
                                newMatchLen = match2.start - match1.start + match2.len - MIN_MATCH;
                            }
                            int correction = newMatchLen - (match2.start - match1.start);
                            if (correction > 0)
                            {
                                match2.Fix(correction);
                            }
                        }

                        if (match2.start + match2.len >= mfLimit || !ht.InsertAndFindWiderMatch(src, match2.End() - 3, match2.start, matchLimit, match2.len, match3))
                        {
                            // no better match -> 2 sequences to encode
                            if (match2.start < match1.End())
                            {
                                match1.len = match2.start - match1.start;
                            }
                            // encode seq 1
                            EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                            anchor = /*sOff =*/ match1.End(); // LUCENENET: IDE0059: Remove unnecessary value assignment
                            // encode seq 2
                            EncodeSequence(src, anchor, match2.@ref, match2.start, match2.len, @out);
                            anchor = sOff = match2.End();
                            goto mainContinue;
                        }

                        if (match3.start < match1.End() + 3) // Not enough space for match 2 : remove it
                        {
                            if (match3.start >= match1.End()) // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
                            {
                                if (match2.start < match1.End())
                                {
                                    int correction = match1.End() - match2.start;
                                    match2.Fix(correction);
                                    if (match2.len < MIN_MATCH)
                                    {
                                        CopyTo(match3, match2);
                                    }
                                }

                                EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                                anchor = /*sOff =*/ match1.End(); // LUCENENET: IDE0059: Remove unnecessary value assignment

                                CopyTo(match3, match1);
                                CopyTo(match2, match0);

                                goto search2Continue;
                            }

                            CopyTo(match3, match2);
                            goto search3Continue;
                        }

                        // OK, now we have 3 ascending matches; let's write at least the first one
                        if (match2.start < match1.End())
                        {
                            if (match2.start - match1.start < 0x0F)
                            {
                                if (match1.len > OPTIMAL_ML)
                                {
                                    match1.len = OPTIMAL_ML;
                                }
                                if (match1.End() > match2.End() - MIN_MATCH)
                                {
                                    match1.len = match2.End() - match1.start - MIN_MATCH;
                                }
                                int correction = match1.End() - match2.start;
                                match2.Fix(correction);
                            }
                            else
                            {
                                match1.len = match2.start - match1.start;
                            }
                        }

                        EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                        anchor = /*sOff =*/ match1.End(); // LUCENENET: IDE0059: Remove unnecessary value assignment

                        CopyTo(match2, match1);
                        CopyTo(match3, match2);

                        goto search3Continue;
                    search3Continue: ;
                    }
                //search3Break: ; // LUCENENET NOTE: Unreachable

                search2Continue: ;
                }
            //search2Break: ; // LUCENENET NOTE: Not referenced

            mainContinue: ;
            }
            //mainBreak: // LUCENENET NOTE: Not referenced

            EncodeLastLiterals(src, anchor, srcEnd - anchor, @out);
        }