/// <summary> /// Average: O(1) * number_of_permutation /// Returns suggested spellings that match value within a given true Damereau-Levenshtein distance. /// </summary> /// <param name="delete_distance">Default: -1. -1 returns everything.</param> public List <Result> Lookup(string value, int delete_distance = -1, Verbosity verbosity = Verbosity.Top) { if (delete_distance < 0) { delete_distance = this.DeleteDistance; } else if (delete_distance > this.DeleteDistance) { throw new ArgumentOutOfRangeException($"{nameof(delete_distance)} ({delete_distance}) cannot be > {nameof(this.DeleteDistance)} ({this.DeleteDistance}).", nameof(delete_distance)); } var permutation_hash = new HashSet <string>(); var visited_results = new HashSet <string>(); var results = new List <Result>(); int best_distance = int.MaxValue; // applies only when verbosity==Closest CreateDeletePermutations(value, delete_distance, permutation_hash); foreach (var permutation in permutation_hash) { if (!m_dict.TryGetValue(permutation, out var node)) { continue; } foreach (var originalString in node) { if (!visited_results.Add(originalString)) { continue; } int cost = FuzzyStringMatch.LevenshteinDistance(value, originalString, 1, 1, 1, 1, delete_distance + 1); if (cost <= delete_distance) { // if exact match, break loop if (cost == 0 && verbosity == Verbosity.Top) { results.Clear(); results.Add(new Result(originalString, cost)); return(results); } // keep only results within the best delete distance found so far if (verbosity == Verbosity.Closest) { if (cost > best_distance) { continue; } else if (cost < best_distance) { results.Clear(); best_distance = cost; } } results.Add(new Result(originalString, cost)); } } } results.Sort(); return(results); }
static void Main(string[] args) { var dist22 = new SpellingSuggestor(3); dist22.AddRange("manly,abcde,hamburger,apple".Split(',')); var fufuufuff = dist22.Lookup("applr").ToList(); var art2 = new AdaptiveRadixTree <string, string>(); art2.Add("dfgdfg", "213123"); var dist22333 = FuzzyStringMatch.CombinedWordEditDistance("H7N0K3", "H7N0K3"); var art = new AdaptiveRadixTree <string, int>(); var dict = new Dictionary <string, int>(); int aa = 0; var items = AdaptiveRadixTreeTest.GenerateTestKeys(100000).ToArray(); foreach (var item in items) { art.Add(in item, aa); dict.Add(item, aa); aa++; } System.Diagnostics.Debug.Write(art.CalculateMetrics()); //// System.Diagnostics.Debug.Write(art.DebugDump(true)); // var now = DateTime.UtcNow; int readResults = 0; // todo: read/write not using stream for (int i = 0; i < items.Length; i++) { var item = items[i]; //readResults += art.PartialMatch(item.Substring(0, item.Length - 1) + ".", '.').Count(); //readResults += art.PartialMatch("." + item.Substring(1), '.').Count(); //readResults += art.RegExpMatch("[A-B-D]" + item.Substring(1)).Count(); //readResults += art.StartsWithKeys(item.Substring(0, item.Length - 1)).Count(); //readResults += art.RegExpNearNeighbors("[A-B-D]" + item.Substring(1), 0).Count(); // remove() // fix path filter enumerator to call the filter method in-order // try with just one entry //var sdfsdf = art.RegExpNearNeighbors("BBBI", 2).ToList(); if (i == 99999) { "".ToLower(); } if (!art.Remove(item)) // f**k at i=163 with offbranch code { "".ToLower(); } //if(i > 145 && art.DebugDump(true).Contains("EXCEPTION")) // "".ToString(); } //var dict4 = new Dictionary<int, string>(100000); //int coll = 0; //for(int i = 0; i < 100000; i++) { // //FuzzyStringMatch.GetStableHashCode(items[i % 100000]); // //items[i % 100000].GetHashCode(); // if((i %100000) == 0) // dict4.Clear(); // try { // dict4.Add(FuzzyStringMatch.GetStableHashCode(items[i % 100000]), items[i % 100000]); // } catch { // coll++; // } //} var diff = DateTime.UtcNow - now; System.Console.WriteLine(diff.ToString()); //System.Console.WriteLine(readResults); System.Console.ReadLine(); }
private static double CalculateLicensePlateSimilarity(string normalized_input, string normalized_match) { //return FuzzyStringMatch.CombinedWordSimilarity(normalized_input, normalized_match); return(FuzzyStringMatch.CombinedWordEditDistance(normalized_input, normalized_match)); }
public int GetHashCode(string obj) { return(FuzzyStringMatch.GetStableHashCode(obj)); }
private static string NormalizeLicensePlate(string value) { //value = value.Replace(" ", string.Empty); // note: UPPER case is relevant because license plate are printed in upper cases, and visual similarity is taken into account return(FuzzyStringMatch.RemoveDiacritics(value.Trim()).ToUpperInvariant()); }