public static List <string> GetTop(IReadOnlyList <Pattern> patterns, string word, int n, IReadOnlyDictionary <string, Affix> affixes) { ConcurrentBag <Pattern> closePatterns = new ConcurrentBag <Pattern>(); for (int i = 0; i < patterns.Count; i++) { if (patterns[i].IsCloseEnough(word, affixes)) { closePatterns.Add(patterns[i]); } } ; var suggestions = new List <string>(); if (closePatterns.Count == 0) { return(suggestions); } var setsOfVariants = closePatterns .Select(p => p.GetVariants(affixes) .Distinct() .Select(v => new { Variant = v, Distance = Levenshtein.GetDistanceTwoRows(word, v) }) .OrderBy(i => i.Distance) .Take(n) .ToList() ).ToList(); return(setsOfVariants.SelectMany(s => s) .OrderBy(s => s.Distance) .Take(n) .Select(i => i.Variant) .ToList()); // TODO: Interleave the suggestions from all of the patterns // when they are at the same distance from the word }
public bool IsCloseEnough(string word, IReadOnlyDictionary <string, Affix> affixes, StringBuilder builder = null, int i = 0) { builder = builder ?? new StringBuilder(word.Length); var matches = new List <int>(10); for (; i < Parts.Count; i++) { var p = Parts[i]; if (IsPartAnAffixFlags[i]) { matches.Clear(); var variants = affixes[p].Values; var shouldGoOn = false; for (int j = 0; j < variants.Length; j++) { var variant = variants[j]; builder.Append(variant); var tLength = i == Parts.Count - 1 ? word.Length : Math.Min(builder.Length, word.Length); if (Levenshtein.GetDistanceTwoRows(builder, word, tLength) <= Constants.ThresholdPlusOne) { shouldGoOn = true; matches.Add(j); } builder.Remove(builder.Length - variant.Length, variant.Length); } if (matches.Count > 1 && i != Parts.Count - 1) { shouldGoOn = false; foreach (var match in matches) { var originalLength = builder.Length; builder.Append(variants[match]); shouldGoOn = IsCloseEnough(word, affixes, builder, i + 1); builder.Remove(originalLength, builder.Length - originalLength); if (shouldGoOn) { return(true); } } return(false); } if (shouldGoOn == false) { return(false); } } else { builder.Append(p as string); if (builder.Length < Constants.ThresholdPlusOne) { continue; } var tLength = i == Parts.Count - 1 ? word.Length : Math.Min(builder.Length, word.Length); if (Levenshtein.GetDistanceTwoRows(builder, word, tLength) > Constants.Threshold) { return(false); } } } return(true); }