public List <string> Search(string word, int tolerance = 2) { List <string> results = new List <string>(); if (word == null || word.Length == 0 || _root == null) { return(results); } Queue <Node> nodesToSearch = new Queue <Node>(); nodesToSearch.Enqueue(_root); while (nodesToSearch.Count > 0) { Node curr = nodesToSearch.Dequeue(); int dist = FuzzyStringMatching.LevenshteinDistance(curr.Word, word); int minDist = dist - tolerance; int maxDist = dist + tolerance; if (dist <= tolerance) { results.Add(curr.Word); } foreach (int key in curr.Keys.Where(key => key >= minDist && key <= maxDist)) { nodesToSearch.Enqueue(curr[key]); } } return(results); }
/// <summary> /// Gets the words that partially match the provided word according to the Jaro-Winkler similarity. /// The provided word must be 3 characters or longer. /// </summary> /// <param name="word">Find words that are close matches to this word. Exact matches are omitted.</param> /// <param name="minSimilarity">The minimum similarity value for a word match to be accepted, where 1 represents matching same word.</param> /// <returns>A list of similar words.</returns> public List <string> GetFuzzyMatchedWordsAlternate(string word, float minSimilarity = 0.7f) { List <string> words = new List <string>(); if (word.Length < 3 || minSimilarity <= 0f || minSimilarity >= 1f) { return(words); } foreach (var kv in _words) { string targetWord = kv.Key; if (FuzzyStringMatching.JaroWinklerSimilarity(word, targetWord) >= minSimilarity) { words.Add(targetWord); } } if (words.Contains(word)) { words.Remove(word); } return(words); }
public void AddWord(string word) { if (_root == null) { _root = new Node(word); return; } Node curr = _root; int dist = FuzzyStringMatching.LevenshteinDistance(curr.Word, word); while (curr.ContainsKey(dist)) { if (dist == 0) { return; } curr = curr[dist]; dist = FuzzyStringMatching.LevenshteinDistance(curr.Word, word); } curr.AddChild(dist, word); }