예제 #1
0
        public static List <string> GetTop(IReadOnlyList <Pattern> patterns, string word, int n, IReadOnlyDictionary <string, Affix> affixes)
        {
            ConcurrentBag <Pattern> closePatterns = new ConcurrentBag <Pattern>();

            for (int i = 0; i < patterns.Count; i++)
            {
                if (patterns[i].IsCloseEnough(word, affixes))
                {
                    closePatterns.Add(patterns[i]);
                }
            }
            ;

            var suggestions = new List <string>();

            if (closePatterns.Count == 0)
            {
                return(suggestions);
            }

            var setsOfVariants = closePatterns
                                 .Select(p => p.GetVariants(affixes)
                                         .Distinct()
                                         .Select(v => new
            {
                Variant  = v,
                Distance = Levenshtein.GetDistanceTwoRows(word, v)
            })
                                         .OrderBy(i => i.Distance)
                                         .Take(n)
                                         .ToList()
                                         ).ToList();

            return(setsOfVariants.SelectMany(s => s)
                   .OrderBy(s => s.Distance)
                   .Take(n)
                   .Select(i => i.Variant)
                   .ToList());

            // TODO: Interleave the suggestions from all of the patterns
            // when they are at the same distance from the word
        }
예제 #2
0
        public bool IsCloseEnough(string word, IReadOnlyDictionary <string, Affix> affixes, StringBuilder builder = null, int i = 0)
        {
            builder = builder ?? new StringBuilder(word.Length);
            var matches = new List <int>(10);

            for (; i < Parts.Count; i++)
            {
                var p = Parts[i];

                if (IsPartAnAffixFlags[i])
                {
                    matches.Clear();

                    var variants   = affixes[p].Values;
                    var shouldGoOn = false;

                    for (int j = 0; j < variants.Length; j++)
                    {
                        var variant = variants[j];

                        builder.Append(variant);
                        var tLength = i == Parts.Count - 1 ? word.Length : Math.Min(builder.Length, word.Length);

                        if (Levenshtein.GetDistanceTwoRows(builder, word, tLength) <= Constants.ThresholdPlusOne)
                        {
                            shouldGoOn = true;
                            matches.Add(j);
                        }

                        builder.Remove(builder.Length - variant.Length, variant.Length);
                    }

                    if (matches.Count > 1 && i != Parts.Count - 1)
                    {
                        shouldGoOn = false;
                        foreach (var match in matches)
                        {
                            var originalLength = builder.Length;
                            builder.Append(variants[match]);

                            shouldGoOn = IsCloseEnough(word, affixes, builder, i + 1);
                            builder.Remove(originalLength, builder.Length - originalLength);

                            if (shouldGoOn)
                            {
                                return(true);
                            }
                        }

                        return(false);
                    }

                    if (shouldGoOn == false)
                    {
                        return(false);
                    }
                }
                else
                {
                    builder.Append(p as string);

                    if (builder.Length < Constants.ThresholdPlusOne)
                    {
                        continue;
                    }

                    var tLength = i == Parts.Count - 1 ? word.Length : Math.Min(builder.Length, word.Length);

                    if (Levenshtein.GetDistanceTwoRows(builder, word, tLength) > Constants.Threshold)
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }