/// <summary> /// Creates a pattern tuple. /// </summary> /// <param name="aString">A string from which to create the pattern tuple</param> /// <returns> /// A tuple containing pattern metadata /// </returns> public Pattern?CreatePattern(string aString) { var pattern = _isCaseSensitive ? aString : aString.ToLower(); var len = pattern.Length; if (len == 0) { return(null); } return(new Pattern(pattern, len, 1 << (len - 1), FuseUtilities.CalculatePatternAlphabet(pattern))); }
/// <summary>Searches for a pattern in a given string.</summary> /// <param name="pattern">The pattern to search for. This is created by calling `CreatePattern`</param> /// <param name="aString">The string in which to search for the pattern</param> /// <returns>A tuple containing a `score` between `0.0` (exact match) and `1` (not a match), and `ranges` of the matched characters. If no match is found will return a tuple with score of 1 and empty array of ranges.</returns> private SearchResult SearchInternal(Pattern pattern, string aString) { var text = aString; if (!_isCaseSensitive) { text = text.ToLower(); } var textLength = text.Length; // Exact match if (pattern.Text == text) { return(new SearchResult(0, new[] { new ClosedRange(0, textLength - 1) })); } var location = _location; var distance = _distance; var threshold = _threshold; int getBestLocation() { var index = text.IndexOf(pattern.Text, location); if (index != -1) { return(index); } return(0); } var bestLocation = getBestLocation(); // A mask of the matches. We'll use to determine all the ranges of the matches var matchMaskArr = Enumerable.Repeat(0, textLength).ToArray(); // Get all exact matches, here for speed up var index = text.IndexOf(pattern.Text, bestLocation); while (index != -1) { var i = index; var _score = FuseUtilities.CalculateScore(pattern.Len, 0, i, location, distance); threshold = Math.Min(threshold, _score); bestLocation = i + pattern.Len; index = text.IndexOf(pattern.Text, bestLocation); var idx = 0; while (idx < pattern.Len) { matchMaskArr[i + idx] = 1; idx += 1; } } // Reset the best location bestLocation = 0; var score = 1.0; var binMax = pattern.Len + textLength; var lastBitArr = new int[0]; var textCount = text.Length; // Magic begins now for (int i = 0; i < pattern.Len; i++) { // Scan for the best match; each iteration allows for one more error. // Run a binary search to determine how far from the match location we can stray at this error level. var binMin = 0; var binMid = binMax; while (binMin < binMid) { if (FuseUtilities.CalculateScore(pattern.Len, i, location, location + binMid, distance) <= threshold) { binMin = binMid; } else { binMax = binMid; } binMid = ((binMax - binMin) / 2) + binMin; } // Use the result from this iteration as the maximum for the next. binMax = binMid; var start = Math.Max(1, location - binMid + 1); var finish = Math.Min(location + binMid, textLength) + pattern.Len; // Initialize the bit array var bitArr = Enumerable.Repeat(0, finish + 2).ToArray(); bitArr[finish + 1] = (1 << i) - 1; if (start > finish) { continue; } int?currentLocationIndex = null; for (int j = finish; j >= start; j--) { var currentLocation = j - 1; // Need to check for `nil` case, since `patternAlphabet` is a sparse hash int getCharMatch() { if (currentLocation < textCount) { currentLocationIndex = currentLocationIndex != null?Math.Max(0, currentLocationIndex.Value - 1) : currentLocation; // todo not too sure here.. var @char = text.ElementAt(currentLocationIndex.Value); if (pattern.Alphabet.ContainsKey(@char)) { return(pattern.Alphabet[@char]); } } return(0); } var charMatch = getCharMatch(); // A match is found if (charMatch != 0) { matchMaskArr[currentLocation] = 1; } // First pass: exact match bitArr[j] = ((bitArr[j + 1] << 1) | 1) & charMatch; // Subsequent passes: fuzzy match if (i > 0) { bitArr[j] |= (((lastBitArr[j + 1] | lastBitArr[j]) << 1) | 1) | lastBitArr[j + 1]; } if ((bitArr[j] & pattern.Mask) != 0) { score = FuseUtilities.CalculateScore(pattern.Len, i, location, currentLocation, distance); // This match will almost certainly be better than any existing match. But check anyway. if (score <= threshold) { // Indeed it is threshold = score; bestLocation = currentLocation; if (bestLocation > location) { // When passing `bestLocation`, don't exceed our current distance from the expected `location`. start = Math.Max(1, 2 * location - bestLocation); } else { // Already passed `location`. No point in continuing. break; } } } } // No hope for a better match at greater error levels if (FuseUtilities.CalculateScore(pattern.Len, i + 1, location, location, distance) > threshold) { break; } lastBitArr = bitArr; } return(new SearchResult(score, FuseUtilities.FindRanges(matchMaskArr))); }