/// <summary> /// Compress <c>bytes[off:off+len]</c> into <paramref name="out"/>. Compared to /// <see cref="LZ4.Compress(byte[], int, int, DataOutput, HashTable)"/>, this method /// is slower and uses more memory (~ 256KB per thread) but should provide /// better compression ratios (especially on large inputs) because it chooses /// the best match among up to 256 candidates and then performs trade-offs to /// fix overlapping matches. <paramref name="ht"/> shouldn't be shared across threads /// but can safely be reused. /// </summary> public static void CompressHC(byte[] src, int srcOff, int srcLen, DataOutput @out, HCHashTable ht) { int srcEnd = srcOff + srcLen; int matchLimit = srcEnd - LAST_LITERALS; int mfLimit = matchLimit - MIN_MATCH; int sOff = srcOff; int anchor = sOff++; ht.Reset(srcOff); Match match0 = new Match(); Match match1 = new Match(); Match match2 = new Match(); Match match3 = new Match(); while (sOff <= mfLimit) { if (!ht.InsertAndFindBestMatch(src, sOff, matchLimit, match1)) { ++sOff; continue; } // saved, in case we would skip too much CopyTo(match1, match0); while (true) { if (Debugging.AssertsEnabled) { Debugging.Assert(match1.start >= anchor); } if (match1.End() >= mfLimit || !ht.InsertAndFindWiderMatch(src, match1.End() - 2, match1.start + 1, matchLimit, match1.len, match2)) { // no better match EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = sOff = match1.End(); goto mainContinue; } if (match0.start < match1.start) { if (match2.start < match1.start + match0.len) // empirical { CopyTo(match0, match1); } } if (Debugging.AssertsEnabled) { Debugging.Assert(match2.start > match1.start); } if (match2.start - match1.start < 3) // First Match too small : removed { CopyTo(match2, match1); goto search2Continue; } while (true) { if (match2.start - match1.start < OPTIMAL_ML) { int newMatchLen = match1.len; if (newMatchLen > OPTIMAL_ML) { newMatchLen = OPTIMAL_ML; } if (match1.start + newMatchLen > match2.End() - MIN_MATCH) { newMatchLen = match2.start - match1.start + match2.len - MIN_MATCH; } int correction = newMatchLen - (match2.start - match1.start); if (correction > 0) { match2.Fix(correction); } } if (match2.start + match2.len >= mfLimit || !ht.InsertAndFindWiderMatch(src, match2.End() - 3, match2.start, matchLimit, match2.len, match3)) { // no better match -> 2 sequences to encode if (match2.start < match1.End()) { match1.len = match2.start - match1.start; } // encode seq 1 EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = sOff = match1.End(); // encode seq 2 EncodeSequence(src, anchor, match2.@ref, match2.start, match2.len, @out); anchor = sOff = match2.End(); goto mainContinue; } if (match3.start < match1.End() + 3) // Not enough space for match 2 : remove it { if (match3.start >= match1.End()) // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 { if (match2.start < match1.End()) { int correction = match1.End() - match2.start; match2.Fix(correction); if (match2.len < MIN_MATCH) { CopyTo(match3, match2); } } EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = sOff = match1.End(); CopyTo(match3, match1); CopyTo(match2, match0); goto search2Continue; } CopyTo(match3, match2); goto search3Continue; } // OK, now we have 3 ascending matches; let's write at least the first one if (match2.start < match1.End()) { if (match2.start - match1.start < 0x0F) { if (match1.len > OPTIMAL_ML) { match1.len = OPTIMAL_ML; } if (match1.End() > match2.End() - MIN_MATCH) { match1.len = match2.End() - match1.start - MIN_MATCH; } int correction = match1.End() - match2.start; match2.Fix(correction); } else { match1.len = match2.start - match1.start; } } EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = sOff = match1.End(); CopyTo(match2, match1); CopyTo(match3, match2); goto search3Continue; search3Continue :; } //search3Break: ; // LUCENENET NOTE: Unreachable search2Continue :; } //search2Break: ; // LUCENENET NOTE: Not referenced mainContinue :; } //mainBreak: // LUCENENET NOTE: Not referenced EncodeLastLiterals(src, anchor, srcEnd - anchor, @out); }
/// <summary> /// Compress <code>bytes[off:off+len]</code> into <code>out</code>. Compared to /// <seealso cref="LZ4#compress(byte[], int, int, DataOutput, HashTable)"/>, this method /// is slower and uses more memory (~ 256KB per thread) but should provide /// better compression ratios (especially on large inputs) because it chooses /// the best match among up to 256 candidates and then performs trade-offs to /// fix overlapping matches. <code>ht</code> shouldn't be shared across threads /// but can safely be reused. /// </summary> public static void CompressHC(sbyte[] src, int srcOff, int srcLen, DataOutput @out, HCHashTable ht) { int srcEnd = srcOff + srcLen; int matchLimit = srcEnd - LAST_LITERALS; int mfLimit = matchLimit - MIN_MATCH; int sOff = srcOff; int anchor = sOff++; ht.Reset(srcOff); Match match0 = new Match(); Match match1 = new Match(); Match match2 = new Match(); Match match3 = new Match(); while (sOff <= mfLimit) { if (!ht.InsertAndFindBestMatch(src, sOff, matchLimit, match1)) { ++sOff; continue; } // saved, in case we would skip too much CopyTo(match1, match0); while (true) { Debug.Assert(match1.Start >= anchor); if (match1.End() >= mfLimit || !ht.InsertAndFindWiderMatch(src, match1.End() - 2, match1.Start + 1, matchLimit, match1.Len, match2)) { // no better match EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out); anchor = sOff = match1.End(); goto mainContinue; } if (match0.Start < match1.Start) { if (match2.Start < match1.Start + match0.Len) // empirical { CopyTo(match0, match1); } } Debug.Assert(match2.Start > match1.Start); if (match2.Start - match1.Start < 3) // First Match too small : removed { CopyTo(match2, match1); goto search2Continue; } while (true) { if (match2.Start - match1.Start < OPTIMAL_ML) { int newMatchLen = match1.Len; if (newMatchLen > OPTIMAL_ML) { newMatchLen = OPTIMAL_ML; } if (match1.Start + newMatchLen > match2.End() - MIN_MATCH) { newMatchLen = match2.Start - match1.Start + match2.Len - MIN_MATCH; } int correction = newMatchLen - (match2.Start - match1.Start); if (correction > 0) { match2.Fix(correction); } } if (match2.Start + match2.Len >= mfLimit || !ht.InsertAndFindWiderMatch(src, match2.End() - 3, match2.Start, matchLimit, match2.Len, match3)) { // no better match -> 2 sequences to encode if (match2.Start < match1.End()) { match1.Len = match2.Start - match1.Start; } // encode seq 1 EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out); anchor = sOff = match1.End(); // encode seq 2 EncodeSequence(src, anchor, match2.@ref, match2.Start, match2.Len, @out); anchor = sOff = match2.End(); goto mainContinue; } if (match3.Start < match1.End() + 3) // Not enough space for match 2 : remove it { if (match3.Start >= match1.End()) // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 { if (match2.Start < match1.End()) { int correction = match1.End() - match2.Start; match2.Fix(correction); if (match2.Len < MIN_MATCH) { CopyTo(match3, match2); } } EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out); anchor = sOff = match1.End(); CopyTo(match3, match1); CopyTo(match2, match0); goto search2Continue; } CopyTo(match3, match2); goto search3Continue; } // OK, now we have 3 ascending matches; let's write at least the first one if (match2.Start < match1.End()) { if (match2.Start - match1.Start < 0x0F) { if (match1.Len > OPTIMAL_ML) { match1.Len = OPTIMAL_ML; } if (match1.End() > match2.End() - MIN_MATCH) { match1.Len = match2.End() - match1.Start - MIN_MATCH; } int correction = match1.End() - match2.Start; match2.Fix(correction); } else { match1.Len = match2.Start - match1.Start; } } EncodeSequence(src, anchor, match1.@ref, match1.Start, match1.Len, @out); anchor = sOff = match1.End(); CopyTo(match2, match1); CopyTo(match3, match2); goto search3Continue; search3Continue: ; } search3Break: ; search2Continue: ; } search2Break: ; mainContinue: ; } mainBreak: EncodeLastLiterals(src, anchor, srcEnd - anchor, @out); }