private static void EncodeLen(int l, DataOutput @out) { while (l >= 0xFF) { @out.WriteByte(/*(byte)*/0xFF); // LUCENENET: Removed unnecessary cast l -= 0xFF; } @out.WriteByte((byte)l); }
internal const int OPTIMAL_ML = 0x0F + 4 - 1; // match length that doesn't require an additional byte /// <summary> /// Compress <c>bytes[off:off+len]</c> into <paramref name="out"/> using /// at most 16KB of memory. <paramref name="ht"/> shouldn't be shared across threads /// but can safely be reused. /// </summary> public static void Compress(byte[] bytes, int off, int len, DataOutput @out, HashTable ht) { int @base = off; int end = off + len; int anchor = off++; if (len > LAST_LITERALS + MIN_MATCH) { int limit = end - LAST_LITERALS; int matchLimit = limit - MIN_MATCH; ht.Reset(len); int hashLog = ht.hashLog; PackedInt32s.Mutable hashTable = ht.hashTable; while (off <= limit) { // find a match int @ref; while (true) { if (off >= matchLimit) { goto mainBreak; } int v = ReadInt32(bytes, off); int h = Hash(v, hashLog); @ref = @base + (int)hashTable.Get(h); if (Debugging.AssertsEnabled) Debugging.Assert(PackedInt32s.BitsRequired(off - @base) <= hashTable.BitsPerValue); hashTable.Set(h, off - @base); if (off - @ref < MAX_DISTANCE && ReadInt32(bytes, @ref) == v) { break; } ++off; } // compute match length int matchLen = MIN_MATCH + CommonBytes(bytes, @ref + MIN_MATCH, off + MIN_MATCH, limit); EncodeSequence(bytes, anchor, @ref, off, matchLen, @out); off += matchLen; anchor = off; //mainContinue: ; // LUCENENET NOTE: Not Referenced } mainBreak: ; } // last literals int literalLen = end - anchor; if (Debugging.AssertsEnabled) Debugging.Assert(literalLen >= LAST_LITERALS || literalLen == len); EncodeLastLiterals(bytes, anchor, end - anchor, @out); }
/// <summary> /// Compress bytes into <paramref name="out"/>. It it the responsibility of the /// compressor to add all necessary information so that a <see cref="Decompressor"/> /// will know when to stop decompressing bytes from the stream. /// </summary> public abstract void Compress(byte[] bytes, int off, int len, DataOutput @out);
private static void EncodeSequence(byte[] bytes, int anchor, int matchRef, int matchOff, int matchLen, DataOutput @out) { int literalLen = matchOff - anchor; if (Debugging.AssertsEnabled) Debugging.Assert(matchLen >= 4); // encode token int token = (Math.Min(literalLen, 0x0F) << 4) | Math.Min(matchLen - 4, 0x0F); EncodeLiterals(bytes, token, anchor, literalLen, @out); // encode match dec int matchDec = matchOff - matchRef; if (Debugging.AssertsEnabled) Debugging.Assert(matchDec > 0 && matchDec < 1 << 16); @out.WriteByte((byte)matchDec); @out.WriteByte((byte)matchDec.TripleShift(8)); // encode match len if (matchLen >= MIN_MATCH + 0x0F) { EncodeLen(matchLen - 0x0F - MIN_MATCH, @out); } }
private static void EncodeLiterals(byte[] bytes, int token, int anchor, int literalLen, DataOutput @out) { @out.WriteByte((byte)token); // encode literal length if (literalLen >= 0x0F) { EncodeLen(literalLen - 0x0F, @out); } // encode literals @out.WriteBytes(bytes, anchor, literalLen); }
private static void EncodeLastLiterals(byte[] bytes, int anchor, int literalLen, DataOutput @out) { int token = Math.Min(literalLen, 0x0F) << 4; EncodeLiterals(bytes, token, anchor, literalLen, @out); }
/// <summary> /// Compress <c>bytes[off:off+len]</c> into <paramref name="out"/>. Compared to /// <see cref="LZ4.Compress(byte[], int, int, DataOutput, HashTable)"/>, this method /// is slower and uses more memory (~ 256KB per thread) but should provide /// better compression ratios (especially on large inputs) because it chooses /// the best match among up to 256 candidates and then performs trade-offs to /// fix overlapping matches. <paramref name="ht"/> shouldn't be shared across threads /// but can safely be reused. /// </summary> public static void CompressHC(byte[] src, int srcOff, int srcLen, DataOutput @out, HCHashTable ht) { int srcEnd = srcOff + srcLen; int matchLimit = srcEnd - LAST_LITERALS; int mfLimit = matchLimit - MIN_MATCH; int sOff = srcOff; int anchor = sOff++; ht.Reset(srcOff); Match match0 = new Match(); Match match1 = new Match(); Match match2 = new Match(); Match match3 = new Match(); while (sOff <= mfLimit) { if (!ht.InsertAndFindBestMatch(src, sOff, matchLimit, match1)) { ++sOff; continue; } // saved, in case we would skip too much CopyTo(match1, match0); while (true) { if (Debugging.AssertsEnabled) Debugging.Assert(match1.start >= anchor); if (match1.End() >= mfLimit || !ht.InsertAndFindWiderMatch(src, match1.End() - 2, match1.start + 1, matchLimit, match1.len, match2)) { // no better match EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = sOff = match1.End(); goto mainContinue; } if (match0.start < match1.start) { if (match2.start < match1.start + match0.len) // empirical { CopyTo(match0, match1); } } if (Debugging.AssertsEnabled) Debugging.Assert(match2.start > match1.start); if (match2.start - match1.start < 3) // First Match too small : removed { CopyTo(match2, match1); goto search2Continue; } while (true) { if (match2.start - match1.start < OPTIMAL_ML) { int newMatchLen = match1.len; if (newMatchLen > OPTIMAL_ML) { newMatchLen = OPTIMAL_ML; } if (match1.start + newMatchLen > match2.End() - MIN_MATCH) { newMatchLen = match2.start - match1.start + match2.len - MIN_MATCH; } int correction = newMatchLen - (match2.start - match1.start); if (correction > 0) { match2.Fix(correction); } } if (match2.start + match2.len >= mfLimit || !ht.InsertAndFindWiderMatch(src, match2.End() - 3, match2.start, matchLimit, match2.len, match3)) { // no better match -> 2 sequences to encode if (match2.start < match1.End()) { match1.len = match2.start - match1.start; } // encode seq 1 EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = /*sOff =*/ match1.End(); // LUCENENET: IDE0059: Remove unnecessary value assignment // encode seq 2 EncodeSequence(src, anchor, match2.@ref, match2.start, match2.len, @out); anchor = sOff = match2.End(); goto mainContinue; } if (match3.start < match1.End() + 3) // Not enough space for match 2 : remove it { if (match3.start >= match1.End()) // // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 { if (match2.start < match1.End()) { int correction = match1.End() - match2.start; match2.Fix(correction); if (match2.len < MIN_MATCH) { CopyTo(match3, match2); } } EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = /*sOff =*/ match1.End(); // LUCENENET: IDE0059: Remove unnecessary value assignment CopyTo(match3, match1); CopyTo(match2, match0); goto search2Continue; } CopyTo(match3, match2); goto search3Continue; } // OK, now we have 3 ascending matches; let's write at least the first one if (match2.start < match1.End()) { if (match2.start - match1.start < 0x0F) { if (match1.len > OPTIMAL_ML) { match1.len = OPTIMAL_ML; } if (match1.End() > match2.End() - MIN_MATCH) { match1.len = match2.End() - match1.start - MIN_MATCH; } int correction = match1.End() - match2.start; match2.Fix(correction); } else { match1.len = match2.start - match1.start; } } EncodeSequence(src, anchor, match1.@ref, match1.start, match1.len, @out); anchor = /*sOff =*/ match1.End(); // LUCENENET: IDE0059: Remove unnecessary value assignment CopyTo(match2, match1); CopyTo(match3, match2); goto search3Continue; search3Continue: ; } //search3Break: ; // LUCENENET NOTE: Unreachable search2Continue: ; } //search2Break: ; // LUCENENET NOTE: Not referenced mainContinue: ; } //mainBreak: // LUCENENET NOTE: Not referenced EncodeLastLiterals(src, anchor, srcEnd - anchor, @out); }