public sealed override HasherSearchResult FindLongestMatch(int ip, int maxLength, int maxDistance, int dictionaryStart, int lastDistance, int bestLenIn) { var result = FindLongestBackReference(ip, maxLength, maxDistance, lastDistance, bestLenIn); if (dictionary != null && !result.FoundAnything && dictionaryMatches >= (dictionaryLookups >> 7)) { DictionaryIndexEntry?bestEntry = null; int bestScore = int.MinValue; foreach (var entry in dictionary.Index.Find(new ArraySegment <byte>(input, ip, input.Length - ip), minLength: 4, maxLength)) { var copyLength = entry.CopyLength; var wordIndex = dictionary.Format.UnpackWordIndex(copyLength, entry.Packed); if (!LimitedDictionary.CanUseWord(copyLength, wordIndex, shallow: true)) { continue; } var transform = dictionary.Transforms[dictionary.Format.UnpackTransformIndex(copyLength, entry.Packed)]; if (!LimitedDictionary.CanUseTransform(transform)) { continue; } int distance = dictionaryStart + entry.Packed; int score = HasherSearchResult.BackwardReferenceScore(entry.OutputLength, distance); if (score > bestScore) // TODO check distance to make sure it doesn't go beyond what can be represented? { bestEntry = entry; bestScore = score; } } ++dictionaryLookups; if (bestEntry != null) { ++dictionaryMatches; return(new HasherSearchResult(bestEntry.Value, bestScore)); } } return(result.Build()); }
public override unsafe void CreateBackwardReferences( ushort *dictionary_hash, size_t num_bytes, size_t position, byte *ringbuffer, size_t ringbuffer_mask, BrotliEncoderParams *params_, HasherHandle hasher, int *dist_cache, size_t *last_insert_len, Command *commands, size_t *num_commands, size_t *num_literals) { /* Set maximum distance, see section 9.1. of the spec. */ size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params_->lgwin); Command *orig_commands = commands; size_t insert_length = *last_insert_len; size_t pos_end = position + num_bytes; size_t store_end = num_bytes >= StoreLookahead() ? position + num_bytes - StoreLookahead() + 1 : position; /* For speed up heuristics for random data. */ size_t random_heuristics_window_size = LiteralSpreeLengthForSparseSearch(params_); size_t apply_random_heuristics = position + random_heuristics_window_size; /* Minimum score to accept a backward reference. */ score_t kMinScore = BROTLI_SCORE_BASE + 100; PrepareDistanceCache(hasher, dist_cache); while (position + HashTypeLength() < pos_end) { size_t max_length = pos_end - position; size_t max_distance = Math.Min(position, max_backward_limit); HasherSearchResult sr = new HasherSearchResult(); sr.len = 0; sr.len_x_code = 0; sr.distance = 0; sr.score = kMinScore; if (FindLongestMatch(hasher, dictionary_hash, ringbuffer, ringbuffer_mask, dist_cache, position, max_length, max_distance, &sr)) { /* Found a match. Let's look for something even better ahead. */ int delayed_backward_references_in_row = 0; --max_length; for (;; --max_length) { score_t cost_diff_lazy = 175; bool is_match_found; HasherSearchResult sr2; sr2.len = params_->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ? Math.Min(sr.len - 1, max_length) : 0; sr2.len_x_code = 0; sr2.distance = 0; sr2.score = kMinScore; max_distance = Math.Min(position + 1, max_backward_limit); is_match_found = FindLongestMatch(hasher, dictionary_hash, ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length, max_distance, &sr2); if (is_match_found && sr2.score >= sr.score + cost_diff_lazy) { /* Ok, let's just write one byte for now and start a match from the * next byte. */ ++position; ++insert_length; sr = sr2; if (++delayed_backward_references_in_row < 4 && position + HashTypeLength() < pos_end) { continue; } } break; } apply_random_heuristics = position + 2 * sr.len + random_heuristics_window_size; max_distance = Math.Min(position, max_backward_limit); { /* The first 16 codes are special short-codes, * and the minimum offset is 1. */ size_t distance_code = ComputeDistanceCode(sr.distance, max_distance, dist_cache); if (sr.distance <= max_distance && distance_code > 0) { dist_cache[3] = dist_cache[2]; dist_cache[2] = dist_cache[1]; dist_cache[1] = dist_cache[0]; dist_cache[0] = (int)sr.distance; PrepareDistanceCache(hasher, dist_cache); } InitCommand(commands++, insert_length, sr.len, sr.len ^ sr.len_x_code, distance_code); } *num_literals += insert_length; insert_length = 0; /* Put the hash keys into the table, if there are enough bytes left. * Depending on the hasher implementation, it can push all positions * in the given range or only a subset of them. */ StoreRange(hasher, ringbuffer, ringbuffer_mask, position + 2, Math.Min(position + sr.len, store_end)); position += sr.len; } else { ++insert_length; ++position; /* If we have not seen matches for a long time, we can skip some * match lookups. Unsuccessful match lookups are very very expensive * and this kind of a heuristic speeds up compression quite * a lot. */ if (position > apply_random_heuristics) { /* Going through uncompressible data, jump. */ if (position > apply_random_heuristics + 4 * random_heuristics_window_size) { /* It is quite a long time since we saw a copy, so we assume * that this data is not compressible, and store hashes less * often. Hashes of non compressible data are less likely to * turn out to be useful in the future, too, so we store less of * them to not to flood out the hash table of good compressible * data. */ size_t kMargin = Math.Max(StoreLookahead() - 1, 4); size_t pos_jump = Math.Min(position + 16, pos_end - kMargin); for (; position < pos_jump; position += 4) { Store(hasher, ringbuffer, ringbuffer_mask, position); insert_length += 4; } } else { size_t kMargin = Math.Max(StoreLookahead() - 1, 2); size_t pos_jump = Math.Min(position + 8, pos_end - kMargin); for (; position < pos_jump; position += 2) { Store(hasher, ringbuffer, ringbuffer_mask, position); insert_length += 2; } } } } } insert_length += pos_end - position; *last_insert_len = insert_length; *num_commands += (size_t)(commands - orig_commands); }
protected override HasherSearchResult.BackReferenceBuilder FindLongestBackReference(int ip, int maxLength, int maxDistance, int lastDistance, int bestLenIn) { byte compareChar = input[ip + bestLenIn]; uint key = HashBytes(input, ip); var result = new HasherSearchResult.BackReferenceBuilder { Len = bestLenIn }; int bestScore = result.Score; int bestLen = result.Len; int cachedBackward = lastDistance; int prevIp = ip - cachedBackward; if (prevIp < ip && prevIp >= 0 && compareChar == input[prevIp + bestLen]) { int len = Match.DetermineLength(input, prevIp, ip, maxLength); if (len >= 4) { int score = HasherSearchResult.BackwardReferenceScoreUsingLastDistance(len); if (bestScore < score) { result.Len = len; result.Distance = cachedBackward; result.Score = score; bestLen = len; bestScore = score; compareChar = input[ip + len]; } } } var keys = new uint[sweep]; for (int i = 0; i < sweep; i++) { keys[i] = (uint)((key + (i << 3)) & bucketMask); } uint keyOut = keys[(ip & sweepMask) >> 3]; for (int i = 0; i < sweep; i++) { prevIp = table[keys[i]]; int backward = ip - prevIp; if (compareChar != input[prevIp + bestLen]) { continue; } if (backward == 0 || backward > maxDistance) { continue; } int len = Match.DetermineLength(input, prevIp, ip, maxLength); if (len >= 4) { int score = HasherSearchResult.BackwardReferenceScore(len, backward); if (bestScore < score) { result.Len = bestLen = len; result.Score = bestScore = score; result.Distance = backward; compareChar = input[ip + len]; } } } table[keyOut] = ip; return(result); }
protected override HasherSearchResult.BackReferenceBuilder FindLongestBackReference(int ip, int maxLength, int maxDistance, int lastDistance, int bestLenIn) { byte compareChar = input[ip + bestLenIn]; uint key = HashBytes(input, ip); var result = new HasherSearchResult.BackReferenceBuilder { Len = bestLenIn }; int bestScore = result.Score; int bestLen = result.Len; int cachedBackward = lastDistance; int prevIp = ip - cachedBackward; int len; if (prevIp < ip && prevIp >= 0 && compareChar == input[prevIp + bestLen]) { len = Match.DetermineLength(input, prevIp, ip, maxLength); if (len >= 4) { int score = HasherSearchResult.BackwardReferenceScoreUsingLastDistance(len); if (bestScore < score) { result.Len = len; result.Distance = cachedBackward; result.Score = score; table[key] = ip; return(result); } } } prevIp = table[key]; table[key] = ip; int backward = ip - prevIp; if (compareChar != input[prevIp + bestLenIn]) { return(result); } if (backward == 0 || backward > maxDistance) { return(result); } len = Match.DetermineLength(input, prevIp, ip, maxLength); if (len >= 4) { int score = HasherSearchResult.BackwardReferenceScore(len, backward); if (bestScore < score) { result.Len = len; result.Score = score; result.Distance = backward; return(result); } } return(result); }