Ejemplo n.º 1
0
        /// <summary>
        /// Creates a pattern tuple.
        /// </summary>
        /// <param name="aString">A string from which to create the pattern tuple</param>
        /// <returns>
        /// A tuple containing pattern metadata
        /// </returns>
        public Pattern?CreatePattern(string aString)
        {
            var pattern = _isCaseSensitive ? aString : aString.ToLower();
            var len     = pattern.Length;

            if (len == 0)
            {
                return(null);
            }

            return(new Pattern(pattern, len, 1 << (len - 1), FuseUtilities.CalculatePatternAlphabet(pattern)));
        }
Ejemplo n.º 2
0
        /// <summary>Searches for a pattern in a given string.</summary>
        /// <param name="pattern">The pattern to search for. This is created by calling `CreatePattern`</param>
        /// <param name="aString">The string in which to search for the pattern</param>
        /// <returns>A tuple containing a `score` between `0.0` (exact match) and `1` (not a match), and `ranges` of the matched characters. If no match is found will return a tuple with score of 1 and empty array of ranges.</returns>
        private SearchResult SearchInternal(Pattern pattern, string aString)
        {
            var text = aString;

            if (!_isCaseSensitive)
            {
                text = text.ToLower();
            }

            var textLength = text.Length;

            // Exact match
            if (pattern.Text == text)
            {
                return(new SearchResult(0, new[] { new ClosedRange(0, textLength - 1) }));
            }

            var location  = _location;
            var distance  = _distance;
            var threshold = _threshold;

            int getBestLocation()
            {
                var index = text.IndexOf(pattern.Text, location);

                if (index != -1)
                {
                    return(index);
                }
                return(0);
            }

            var bestLocation = getBestLocation();

            // A mask of the matches. We'll use to determine all the ranges of the matches
            var matchMaskArr = Enumerable.Repeat(0, textLength).ToArray();

            // Get all exact matches, here for speed up
            var index = text.IndexOf(pattern.Text, bestLocation);

            while (index != -1)
            {
                var i      = index;
                var _score = FuseUtilities.CalculateScore(pattern.Len, 0, i, location, distance);

                threshold    = Math.Min(threshold, _score);
                bestLocation = i + pattern.Len;
                index        = text.IndexOf(pattern.Text, bestLocation);

                var idx = 0;
                while (idx < pattern.Len)
                {
                    matchMaskArr[i + idx] = 1;
                    idx += 1;
                }
            }

            // Reset the best location
            bestLocation = 0;

            var score      = 1.0;
            var binMax     = pattern.Len + textLength;
            var lastBitArr = new int[0];

            var textCount = text.Length;

            // Magic begins now
            for (int i = 0; i < pattern.Len; i++)
            {
                // Scan for the best match; each iteration allows for one more error.
                // Run a binary search to determine how far from the match location we can stray at this error level.
                var binMin = 0;
                var binMid = binMax;

                while (binMin < binMid)
                {
                    if (FuseUtilities.CalculateScore(pattern.Len, i, location, location + binMid, distance) <= threshold)
                    {
                        binMin = binMid;
                    }
                    else
                    {
                        binMax = binMid;
                    }
                    binMid = ((binMax - binMin) / 2) + binMin;
                }

                // Use the result from this iteration as the maximum for the next.
                binMax = binMid;
                var start  = Math.Max(1, location - binMid + 1);
                var finish = Math.Min(location + binMid, textLength) + pattern.Len;

                // Initialize the bit array
                var bitArr = Enumerable.Repeat(0, finish + 2).ToArray();
                bitArr[finish + 1] = (1 << i) - 1;

                if (start > finish)
                {
                    continue;
                }

                int?currentLocationIndex = null;

                for (int j = finish; j >= start; j--)
                {
                    var currentLocation = j - 1;

                    // Need to check for `nil` case, since `patternAlphabet` is a sparse hash
                    int getCharMatch()
                    {
                        if (currentLocation < textCount)
                        {
                            currentLocationIndex = currentLocationIndex != null?Math.Max(0, currentLocationIndex.Value - 1) : currentLocation;   // todo not too sure here..

                            var @char = text.ElementAt(currentLocationIndex.Value);

                            if (pattern.Alphabet.ContainsKey(@char))
                            {
                                return(pattern.Alphabet[@char]);
                            }
                        }
                        return(0);
                    }

                    var charMatch = getCharMatch();

                    // A match is found
                    if (charMatch != 0)
                    {
                        matchMaskArr[currentLocation] = 1;
                    }

                    // First pass: exact match
                    bitArr[j] = ((bitArr[j + 1] << 1) | 1) & charMatch;

                    // Subsequent passes: fuzzy match
                    if (i > 0)
                    {
                        bitArr[j] |= (((lastBitArr[j + 1] | lastBitArr[j]) << 1) | 1) | lastBitArr[j + 1];
                    }

                    if ((bitArr[j] & pattern.Mask) != 0)
                    {
                        score = FuseUtilities.CalculateScore(pattern.Len, i, location, currentLocation, distance);

                        // This match will almost certainly be better than any existing match. But check anyway.
                        if (score <= threshold)
                        {
                            // Indeed it is
                            threshold    = score;
                            bestLocation = currentLocation;

                            if (bestLocation > location)
                            {
                                // When passing `bestLocation`, don't exceed our current distance from the expected `location`.
                                start = Math.Max(1, 2 * location - bestLocation);
                            }
                            else
                            {
                                // Already passed `location`. No point in continuing.
                                break;
                            }
                        }
                    }
                }

                // No hope for a better match at greater error levels
                if (FuseUtilities.CalculateScore(pattern.Len, i + 1, location, location, distance) > threshold)
                {
                    break;
                }

                lastBitArr = bitArr;
            }

            return(new SearchResult(score, FuseUtilities.FindRanges(matchMaskArr)));
        }