Ejemplo n.º 1
0
        /// <summary>
        /// Compress <c>bytes[off:off+len]</c> into <paramref name="out"/>. Compared to
        /// <see cref="LZ4.Compress(byte[], int, int, DataOutput, HashTable)"/>, this method
        /// is slower and uses more memory (~ 256KB per thread) but should provide
        /// better compression ratios (especially on large inputs) because it chooses
        /// the best match among up to 256 candidates and then performs trade-offs to
        /// fix overlapping matches. <paramref name="ht"/> shouldn't be shared across threads
        /// but can safely be reused.
        /// </summary>
        public static void CompressHC(byte[] src, int srcOff, int srcLen, DataOutput @out, HCHashTable ht)
        {
            int srcEnd     = srcOff + srcLen;
            int matchLimit = srcEnd - LAST_LITERALS;
            int mfLimit    = matchLimit - MIN_MATCH;

            int sOff   = srcOff;
            int anchor = sOff++;

            ht.Reset(srcOff);
            Match match0 = new Match();
            Match match1 = new Match();
            Match match2 = new Match();
            Match match3 = new Match();

            while (sOff <= mfLimit)
            {
                if (!ht.InsertAndFindBestMatch(src, sOff, matchLimit, match1))
                {
                    ++sOff;
                    continue;
                }

                // saved, in case we would skip too much
                CopyTo(match1, match0);

                while (true)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(match1.start >= anchor);
                    }
                    if (match1.End() >= mfLimit || !ht.InsertAndFindWiderMatch(src, match1.End() - 2, match1.start + 1, matchLimit, match1.len, match2))
                    {
                        // no better match
                        EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                        anchor = sOff = match1.End();
                        goto mainContinue;
                    }

                    if (match0.start < match1.start)
                    {
                        if (match2.start < match1.start + match0.len) // empirical
                        {
                            CopyTo(match0, match1);
                        }
                    }
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(match2.start > match1.start);
                    }

                    if (match2.start - match1.start < 3) // First Match too small : removed
                    {
                        CopyTo(match2, match1);
                        goto search2Continue;
                    }

                    while (true)
                    {
                        if (match2.start - match1.start < OPTIMAL_ML)
                        {
                            int newMatchLen = match1.len;
                            if (newMatchLen > OPTIMAL_ML)
                            {
                                newMatchLen = OPTIMAL_ML;
                            }
                            if (match1.start + newMatchLen > match2.End() - MIN_MATCH)
                            {
                                newMatchLen = match2.start - match1.start + match2.len - MIN_MATCH;
                            }
                            int correction = newMatchLen - (match2.start - match1.start);
                            if (correction > 0)
                            {
                                match2.Fix(correction);
                            }
                        }

                        if (match2.start + match2.len >= mfLimit || !ht.InsertAndFindWiderMatch(src, match2.End() - 3, match2.start, matchLimit, match2.len, match3))
                        {
                            // no better match -> 2 sequences to encode
                            if (match2.start < match1.End())
                            {
                                match1.len = match2.start - match1.start;
                            }
                            // encode seq 1
                            EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                            anchor = sOff = match1.End();
                            // encode seq 2
                            EncodeSequence(src, anchor, match2.@ref, match2.start, match2.len, @out);
                            anchor = sOff = match2.End();
                            goto mainContinue;
                        }

                        if (match3.start < match1.End() + 3)  // Not enough space for match 2 : remove it
                        {
                            if (match3.start >= match1.End()) // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
                            {
                                if (match2.start < match1.End())
                                {
                                    int correction = match1.End() - match2.start;
                                    match2.Fix(correction);
                                    if (match2.len < MIN_MATCH)
                                    {
                                        CopyTo(match3, match2);
                                    }
                                }

                                EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                                anchor = sOff = match1.End();

                                CopyTo(match3, match1);
                                CopyTo(match2, match0);

                                goto search2Continue;
                            }

                            CopyTo(match3, match2);
                            goto search3Continue;
                        }

                        // OK, now we have 3 ascending matches; let's write at least the first one
                        if (match2.start < match1.End())
                        {
                            if (match2.start - match1.start < 0x0F)
                            {
                                if (match1.len > OPTIMAL_ML)
                                {
                                    match1.len = OPTIMAL_ML;
                                }
                                if (match1.End() > match2.End() - MIN_MATCH)
                                {
                                    match1.len = match2.End() - match1.start - MIN_MATCH;
                                }
                                int correction = match1.End() - match2.start;
                                match2.Fix(correction);
                            }
                            else
                            {
                                match1.len = match2.start - match1.start;
                            }
                        }

                        EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out);
                        anchor = sOff = match1.End();

                        CopyTo(match2, match1);
                        CopyTo(match3, match2);

                        goto search3Continue;
                        search3Continue :;
                    }
                    //search3Break: ; // LUCENENET NOTE: Unreachable

                    search2Continue :;
                }
                //search2Break: ; // LUCENENET NOTE: Not referenced

                mainContinue :;
            }
            //mainBreak: // LUCENENET NOTE: Not referenced

            EncodeLastLiterals(src, anchor, srcEnd - anchor, @out);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Compress <code>bytes[off:off+len]</code> into <code>out</code>. Compared to
        /// <seealso cref="LZ4#compress(byte[], int, int, DataOutput, HashTable)"/>, this method
        /// is slower and uses more memory (~ 256KB per thread) but should provide
        /// better compression ratios (especially on large inputs) because it chooses
        /// the best match among up to 256 candidates and then performs trade-offs to
        /// fix overlapping matches. <code>ht</code> shouldn't be shared across threads
        /// but can safely be reused.
        /// </summary>
        public static void CompressHC(sbyte[] src, int srcOff, int srcLen, DataOutput @out, HCHashTable ht)
        {
            int srcEnd = srcOff + srcLen;
            int matchLimit = srcEnd - LAST_LITERALS;
            int mfLimit = matchLimit - MIN_MATCH;

            int sOff = srcOff;
            int anchor = sOff++;

            ht.Reset(srcOff);
            Match match0 = new Match();
            Match match1 = new Match();
            Match match2 = new Match();
            Match match3 = new Match();

            while (sOff <= mfLimit)
            {
                if (!ht.InsertAndFindBestMatch(src, sOff, matchLimit, match1))
                {
                    ++sOff;
                    continue;
                }

                // saved, in case we would skip too much
                CopyTo(match1, match0);

                while (true)
                {
                    Debug.Assert(match1.Start >= anchor);
                    if (match1.End() >= mfLimit || !ht.InsertAndFindWiderMatch(src, match1.End() - 2, match1.Start + 1, matchLimit, match1.Len, match2))
                    {
                        // no better match
                        EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out);
                        anchor = sOff = match1.End();
                        goto mainContinue;
                    }

                    if (match0.Start < match1.Start)
                    {
                        if (match2.Start < match1.Start + match0.Len) // empirical
                        {
                            CopyTo(match0, match1);
                        }
                    }
                    Debug.Assert(match2.Start > match1.Start);

                    if (match2.Start - match1.Start < 3) // First Match too small : removed
                    {
                        CopyTo(match2, match1);
                        goto search2Continue;
                    }

                    while (true)
                    {
                        if (match2.Start - match1.Start < OPTIMAL_ML)
                        {
                            int newMatchLen = match1.Len;
                            if (newMatchLen > OPTIMAL_ML)
                            {
                                newMatchLen = OPTIMAL_ML;
                            }
                            if (match1.Start + newMatchLen > match2.End() - MIN_MATCH)
                            {
                                newMatchLen = match2.Start - match1.Start + match2.Len - MIN_MATCH;
                            }
                            int correction = newMatchLen - (match2.Start - match1.Start);
                            if (correction > 0)
                            {
                                match2.Fix(correction);
                            }
                        }

                        if (match2.Start + match2.Len >= mfLimit || !ht.InsertAndFindWiderMatch(src, match2.End() - 3, match2.Start, matchLimit, match2.Len, match3))
                        {
                            // no better match -> 2 sequences to encode
                            if (match2.Start < match1.End())
                            {
                                match1.Len = match2.Start - match1.Start;
                            }
                            // encode seq 1
                            EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out);
                            anchor = sOff = match1.End();
                            // encode seq 2
                            EncodeSequence(src, anchor, match2.@ref, match2.Start, match2.Len, @out);
                            anchor = sOff = match2.End();
                            goto mainContinue;
                        }

                        if (match3.Start < match1.End() + 3) // Not enough space for match 2 : remove it
                        {
                            if (match3.Start >= match1.End()) // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
                            {
                                if (match2.Start < match1.End())
                                {
                                    int correction = match1.End() - match2.Start;
                                    match2.Fix(correction);
                                    if (match2.Len < MIN_MATCH)
                                    {
                                        CopyTo(match3, match2);
                                    }
                                }

                                EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out);
                                anchor = sOff = match1.End();

                                CopyTo(match3, match1);
                                CopyTo(match2, match0);

                                goto search2Continue;
                            }

                            CopyTo(match3, match2);
                            goto search3Continue;
                        }

                        // OK, now we have 3 ascending matches; let's write at least the first one
                        if (match2.Start < match1.End())
                        {
                            if (match2.Start - match1.Start < 0x0F)
                            {
                                if (match1.Len > OPTIMAL_ML)
                                {
                                    match1.Len = OPTIMAL_ML;
                                }
                                if (match1.End() > match2.End() - MIN_MATCH)
                                {
                                    match1.Len = match2.End() - match1.Start - MIN_MATCH;
                                }
                                int correction = match1.End() - match2.Start;
                                match2.Fix(correction);
                            }
                            else
                            {
                                match1.Len = match2.Start - match1.Start;
                            }
                        }

                        EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out);
                        anchor = sOff = match1.End();

                        CopyTo(match2, match1);
                        CopyTo(match3, match2);

                        goto search3Continue;
                    search3Continue: ;
                    }
                search3Break: ;

                search2Continue: ;
                }
            search2Break: ;

            mainContinue: ;
            }
        mainBreak:

            EncodeLastLiterals(src, anchor, srcEnd - anchor, @out);
        }