コード例 #1
0
        /// <summary>
        ///     Average: O(1) * number_of_permutation
        ///     Returns suggested spellings that match value within a given true Damereau-Levenshtein distance.
        /// </summary>
        /// <param name="delete_distance">Default: -1. -1 returns everything.</param>
        public List <Result> Lookup(string value, int delete_distance = -1, Verbosity verbosity = Verbosity.Top)
        {
            if (delete_distance < 0)
            {
                delete_distance = this.DeleteDistance;
            }
            else if (delete_distance > this.DeleteDistance)
            {
                throw new ArgumentOutOfRangeException($"{nameof(delete_distance)} ({delete_distance}) cannot be > {nameof(this.DeleteDistance)} ({this.DeleteDistance}).", nameof(delete_distance));
            }

            var permutation_hash = new HashSet <string>();
            var visited_results  = new HashSet <string>();
            var results          = new List <Result>();
            int best_distance    = int.MaxValue; // applies only when verbosity==Closest

            CreateDeletePermutations(value, delete_distance, permutation_hash);
            foreach (var permutation in permutation_hash)
            {
                if (!m_dict.TryGetValue(permutation, out var node))
                {
                    continue;
                }

                foreach (var originalString in node)
                {
                    if (!visited_results.Add(originalString))
                    {
                        continue;
                    }

                    int cost = FuzzyStringMatch.LevenshteinDistance(value, originalString, 1, 1, 1, 1, delete_distance + 1);
                    if (cost <= delete_distance)
                    {
                        // if exact match, break loop
                        if (cost == 0 && verbosity == Verbosity.Top)
                        {
                            results.Clear();
                            results.Add(new Result(originalString, cost));
                            return(results);
                        }

                        // keep only results within the best delete distance found so far
                        if (verbosity == Verbosity.Closest)
                        {
                            if (cost > best_distance)
                            {
                                continue;
                            }
                            else if (cost < best_distance)
                            {
                                results.Clear();
                                best_distance = cost;
                            }
                        }

                        results.Add(new Result(originalString, cost));
                    }
                }
            }

            results.Sort();

            return(results);
        }
コード例 #2
0
        static void Main(string[] args)
        {
            var dist22 = new SpellingSuggestor(3);

            dist22.AddRange("manly,abcde,hamburger,apple".Split(','));
            var fufuufuff = dist22.Lookup("applr").ToList();

            var art2 = new AdaptiveRadixTree <string, string>();

            art2.Add("dfgdfg", "213123");

            var dist22333 = FuzzyStringMatch.CombinedWordEditDistance("H7N0K3", "H7N0K3");

            var art  = new AdaptiveRadixTree <string, int>();
            var dict = new Dictionary <string, int>();
            int aa   = 0;

            var items = AdaptiveRadixTreeTest.GenerateTestKeys(100000).ToArray();


            foreach (var item in items)
            {
                art.Add(in item, aa);
                dict.Add(item, aa);
                aa++;
            }
            System.Diagnostics.Debug.Write(art.CalculateMetrics());
            //// System.Diagnostics.Debug.Write(art.DebugDump(true));
            //
            var now         = DateTime.UtcNow;
            int readResults = 0;

            // todo: read/write not using stream
            for (int i = 0; i < items.Length; i++)
            {
                var item = items[i];
                //readResults += art.PartialMatch(item.Substring(0, item.Length - 1) + ".", '.').Count();
                //readResults += art.PartialMatch("." + item.Substring(1), '.').Count();
                //readResults += art.RegExpMatch("[A-B-D]" + item.Substring(1)).Count();
                //readResults += art.StartsWithKeys(item.Substring(0, item.Length - 1)).Count();
                //readResults += art.RegExpNearNeighbors("[A-B-D]" + item.Substring(1), 0).Count();

                // remove()
                // fix path filter enumerator to call the filter method in-order

                // try with just one entry

                //var sdfsdf = art.RegExpNearNeighbors("BBBI", 2).ToList();


                if (i == 99999)
                {
                    "".ToLower();
                }
                if (!art.Remove(item)) // f**k at i=163 with offbranch code
                {
                    "".ToLower();
                }
                //if(i > 145 && art.DebugDump(true).Contains("EXCEPTION"))
                //    "".ToString();
            }

            //var dict4 = new Dictionary<int, string>(100000);
            //int coll = 0;
            //for(int i = 0; i < 100000; i++) {
            //    //FuzzyStringMatch.GetStableHashCode(items[i % 100000]);
            //    //items[i % 100000].GetHashCode();
            //                    if((i %100000) == 0)
            //        dict4.Clear();
            //    try {
            //        dict4.Add(FuzzyStringMatch.GetStableHashCode(items[i % 100000]), items[i % 100000]);
            //    } catch {
            //        coll++;
            //        }
            //}

            var diff = DateTime.UtcNow - now;

            System.Console.WriteLine(diff.ToString());
            //System.Console.WriteLine(readResults);
            System.Console.ReadLine();
        }
コード例 #3
0
 private static double CalculateLicensePlateSimilarity(string normalized_input, string normalized_match)
 {
     //return FuzzyStringMatch.CombinedWordSimilarity(normalized_input, normalized_match);
     return(FuzzyStringMatch.CombinedWordEditDistance(normalized_input, normalized_match));
 }
コード例 #4
0
 public int GetHashCode(string obj)
 {
     return(FuzzyStringMatch.GetStableHashCode(obj));
 }
コード例 #5
0
 private static string NormalizeLicensePlate(string value)
 {
     //value = value.Replace(" ", string.Empty);
     // note: UPPER case is relevant because license plate are printed in upper cases, and visual similarity is taken into account
     return(FuzzyStringMatch.RemoveDiacritics(value.Trim()).ToUpperInvariant());
 }