public sealed override HasherSearchResult FindLongestMatch(int ip, int maxLength, int maxDistance, int dictionaryStart, int lastDistance, int bestLenIn)
        {
            var result = FindLongestBackReference(ip, maxLength, maxDistance, lastDistance, bestLenIn);

            if (dictionary != null && !result.FoundAnything && dictionaryMatches >= (dictionaryLookups >> 7))
            {
                DictionaryIndexEntry?bestEntry = null;
                int bestScore = int.MinValue;

                foreach (var entry in dictionary.Index.Find(new ArraySegment <byte>(input, ip, input.Length - ip), minLength: 4, maxLength))
                {
                    var copyLength = entry.CopyLength;
                    var wordIndex  = dictionary.Format.UnpackWordIndex(copyLength, entry.Packed);

                    if (!LimitedDictionary.CanUseWord(copyLength, wordIndex, shallow: true))
                    {
                        continue;
                    }

                    var transform = dictionary.Transforms[dictionary.Format.UnpackTransformIndex(copyLength, entry.Packed)];

                    if (!LimitedDictionary.CanUseTransform(transform))
                    {
                        continue;
                    }

                    int distance = dictionaryStart + entry.Packed;
                    int score    = HasherSearchResult.BackwardReferenceScore(entry.OutputLength, distance);

                    if (score > bestScore)  // TODO check distance to make sure it doesn't go beyond what can be represented?
                    {
                        bestEntry = entry;
                        bestScore = score;
                    }
                }

                ++dictionaryLookups;

                if (bestEntry != null)
                {
                    ++dictionaryMatches;
                    return(new HasherSearchResult(bestEntry.Value, bestScore));
                }
            }

            return(result.Build());
        }
Exemple #2
0
            public override unsafe void CreateBackwardReferences(
                ushort *dictionary_hash,
                size_t num_bytes, size_t position,
                byte *ringbuffer, size_t ringbuffer_mask,
                BrotliEncoderParams *params_, HasherHandle hasher, int *dist_cache,
                size_t *last_insert_len, Command *commands, size_t *num_commands,
                size_t *num_literals)
            {
                /* Set maximum distance, see section 9.1. of the spec. */
                size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params_->lgwin);

                Command *orig_commands = commands;
                size_t   insert_length = *last_insert_len;
                size_t   pos_end       = position + num_bytes;
                size_t   store_end     = num_bytes >= StoreLookahead()
                    ? position + num_bytes - StoreLookahead() + 1
                    : position;

                /* For speed up heuristics for random data. */
                size_t random_heuristics_window_size =
                    LiteralSpreeLengthForSparseSearch(params_);
                size_t apply_random_heuristics = position + random_heuristics_window_size;

                /* Minimum score to accept a backward reference. */
                score_t kMinScore = BROTLI_SCORE_BASE + 100;

                PrepareDistanceCache(hasher, dist_cache);

                while (position + HashTypeLength() < pos_end)
                {
                    size_t             max_length   = pos_end - position;
                    size_t             max_distance = Math.Min(position, max_backward_limit);
                    HasherSearchResult sr           = new HasherSearchResult();
                    sr.len        = 0;
                    sr.len_x_code = 0;
                    sr.distance   = 0;
                    sr.score      = kMinScore;
                    if (FindLongestMatch(hasher, dictionary_hash,
                                         ringbuffer, ringbuffer_mask, dist_cache,
                                         position, max_length, max_distance, &sr))
                    {
                        /* Found a match. Let's look for something even better ahead. */
                        int delayed_backward_references_in_row = 0;
                        --max_length;
                        for (;; --max_length)
                        {
                            score_t            cost_diff_lazy = 175;
                            bool               is_match_found;
                            HasherSearchResult sr2;
                            sr2.len = params_->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH
                                ? Math.Min(sr.len - 1, max_length)
                                : 0;

                            sr2.len_x_code = 0;
                            sr2.distance   = 0;
                            sr2.score      = kMinScore;
                            max_distance   = Math.Min(position + 1, max_backward_limit);
                            is_match_found = FindLongestMatch(hasher,
                                                              dictionary_hash, ringbuffer, ringbuffer_mask, dist_cache,
                                                              position + 1, max_length, max_distance, &sr2);
                            if (is_match_found && sr2.score >= sr.score + cost_diff_lazy)
                            {
                                /* Ok, let's just write one byte for now and start a match from the
                                 * next byte. */
                                ++position;
                                ++insert_length;
                                sr = sr2;
                                if (++delayed_backward_references_in_row < 4 &&
                                    position + HashTypeLength() < pos_end)
                                {
                                    continue;
                                }
                            }
                            break;
                        }
                        apply_random_heuristics =
                            position + 2 * sr.len + random_heuristics_window_size;
                        max_distance = Math.Min(position, max_backward_limit);
                        {
                            /* The first 16 codes are special short-codes,
                             * and the minimum offset is 1. */
                            size_t distance_code =
                                ComputeDistanceCode(sr.distance, max_distance, dist_cache);
                            if (sr.distance <= max_distance && distance_code > 0)
                            {
                                dist_cache[3] = dist_cache[2];
                                dist_cache[2] = dist_cache[1];
                                dist_cache[1] = dist_cache[0];
                                dist_cache[0] = (int)sr.distance;
                                PrepareDistanceCache(hasher, dist_cache);
                            }
                            InitCommand(commands++, insert_length, sr.len, sr.len ^ sr.len_x_code,
                                        distance_code);
                        }
                        *num_literals += insert_length;
                        insert_length = 0;

                        /* Put the hash keys into the table, if there are enough bytes left.
                         * Depending on the hasher implementation, it can push all positions
                         * in the given range or only a subset of them. */
                        StoreRange(hasher, ringbuffer, ringbuffer_mask, position + 2,
                                   Math.Min(position + sr.len, store_end));
                        position += sr.len;
                    }
                    else
                    {
                        ++insert_length;
                        ++position;

                        /* If we have not seen matches for a long time, we can skip some
                         * match lookups. Unsuccessful match lookups are very very expensive
                         * and this kind of a heuristic speeds up compression quite
                         * a lot. */
                        if (position > apply_random_heuristics)
                        {
                            /* Going through uncompressible data, jump. */
                            if (position >
                                apply_random_heuristics + 4 * random_heuristics_window_size)
                            {
                                /* It is quite a long time since we saw a copy, so we assume
                                 * that this data is not compressible, and store hashes less
                                 * often. Hashes of non compressible data are less likely to
                                 * turn out to be useful in the future, too, so we store less of
                                 * them to not to flood out the hash table of good compressible
                                 * data. */
                                size_t kMargin =
                                    Math.Max(StoreLookahead() - 1, 4);
                                size_t pos_jump =
                                    Math.Min(position + 16, pos_end - kMargin);
                                for (; position < pos_jump; position += 4)
                                {
                                    Store(hasher, ringbuffer, ringbuffer_mask, position);
                                    insert_length += 4;
                                }
                            }
                            else
                            {
                                size_t kMargin =
                                    Math.Max(StoreLookahead() - 1, 2);
                                size_t pos_jump =
                                    Math.Min(position + 8, pos_end - kMargin);
                                for (; position < pos_jump; position += 2)
                                {
                                    Store(hasher, ringbuffer, ringbuffer_mask, position);
                                    insert_length += 2;
                                }
                            }
                        }
                    }
                }
                insert_length += pos_end - position;
                *last_insert_len = insert_length;
                *num_commands   += (size_t)(commands - orig_commands);
            }
            protected override HasherSearchResult.BackReferenceBuilder FindLongestBackReference(int ip, int maxLength, int maxDistance, int lastDistance, int bestLenIn)
            {
                byte compareChar = input[ip + bestLenIn];
                uint key         = HashBytes(input, ip);

                var result = new HasherSearchResult.BackReferenceBuilder {
                    Len = bestLenIn
                };

                int bestScore = result.Score;
                int bestLen   = result.Len;

                int cachedBackward = lastDistance;
                int prevIp         = ip - cachedBackward;

                if (prevIp < ip && prevIp >= 0 && compareChar == input[prevIp + bestLen])
                {
                    int len = Match.DetermineLength(input, prevIp, ip, maxLength);

                    if (len >= 4)
                    {
                        int score = HasherSearchResult.BackwardReferenceScoreUsingLastDistance(len);

                        if (bestScore < score)
                        {
                            result.Len      = len;
                            result.Distance = cachedBackward;
                            result.Score    = score;

                            bestLen     = len;
                            bestScore   = score;
                            compareChar = input[ip + len];
                        }
                    }
                }

                var keys = new uint[sweep];

                for (int i = 0; i < sweep; i++)
                {
                    keys[i] = (uint)((key + (i << 3)) & bucketMask);
                }

                uint keyOut = keys[(ip & sweepMask) >> 3];

                for (int i = 0; i < sweep; i++)
                {
                    prevIp = table[keys[i]];

                    int backward = ip - prevIp;

                    if (compareChar != input[prevIp + bestLen])
                    {
                        continue;
                    }

                    if (backward == 0 || backward > maxDistance)
                    {
                        continue;
                    }

                    int len = Match.DetermineLength(input, prevIp, ip, maxLength);

                    if (len >= 4)
                    {
                        int score = HasherSearchResult.BackwardReferenceScore(len, backward);

                        if (bestScore < score)
                        {
                            result.Len      = bestLen = len;
                            result.Score    = bestScore = score;
                            result.Distance = backward;

                            compareChar = input[ip + len];
                        }
                    }
                }

                table[keyOut] = ip;
                return(result);
            }
            protected override HasherSearchResult.BackReferenceBuilder FindLongestBackReference(int ip, int maxLength, int maxDistance, int lastDistance, int bestLenIn)
            {
                byte compareChar = input[ip + bestLenIn];
                uint key         = HashBytes(input, ip);

                var result = new HasherSearchResult.BackReferenceBuilder {
                    Len = bestLenIn
                };

                int bestScore = result.Score;
                int bestLen   = result.Len;

                int cachedBackward = lastDistance;
                int prevIp         = ip - cachedBackward;
                int len;

                if (prevIp < ip && prevIp >= 0 && compareChar == input[prevIp + bestLen])
                {
                    len = Match.DetermineLength(input, prevIp, ip, maxLength);

                    if (len >= 4)
                    {
                        int score = HasherSearchResult.BackwardReferenceScoreUsingLastDistance(len);

                        if (bestScore < score)
                        {
                            result.Len      = len;
                            result.Distance = cachedBackward;
                            result.Score    = score;

                            table[key] = ip;
                            return(result);
                        }
                    }
                }

                prevIp     = table[key];
                table[key] = ip;

                int backward = ip - prevIp;

                if (compareChar != input[prevIp + bestLenIn])
                {
                    return(result);
                }

                if (backward == 0 || backward > maxDistance)
                {
                    return(result);
                }

                len = Match.DetermineLength(input, prevIp, ip, maxLength);

                if (len >= 4)
                {
                    int score = HasherSearchResult.BackwardReferenceScore(len, backward);

                    if (bestScore < score)
                    {
                        result.Len      = len;
                        result.Score    = score;
                        result.Distance = backward;
                        return(result);
                    }
                }

                return(result);
            }