Ejemplo n.º 1
0
        private IEnumerable <IWordSimilarityNode> OrderPossibilities(string input, int maxResults, ICollection <string> possibleResults)
        {
            //If we only have a small amount, we can sort them all.
            if (possibleResults.Count <= maxResults)
            {
                var queue = new WordQueue(possibleResults.Count);
                foreach (var possibleResult in possibleResults)
                {
                    queue.Enqueue(possibleResult, StringSimilarityProvider.Similarity(input, possibleResult));
                }

                return(queue.OrderByDescending(x => x.Similarity));
            }

            var max = Math.Min(possibleResults.Count, maxResults);

            //With a large number of words to evaluate, but we only care about the top matches.
            //Instead of sorting a large number of elements at the end, we use a queue and restrict it's running size to max.
            //This queue will do that faster than other container types.
            //SortedList.RemoveAt is O(n)
            //SortedDictionary/SortedSet.ElementAt is O(n)
            var likelyWordsQueue = new WordQueue(max);

            foreach (var word in possibleResults)
            {
                var jw = StringSimilarityProvider.Similarity(input, word);
                likelyWordsQueue.Enqueue(word, jw);
            }

            return(likelyWordsQueue.OrderByDescending(x => x.Similarity));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Finds possible corrections or extensions for a given word.
        /// </summary>
        /// <param name="input">The word to spell-check.</param>
        /// <param name="maxResults">The maximum number of results returned.</param>
        /// <returns>Up to <see cref="maxResults">maxResults</see> words along with their similarity score. Not sorted.</returns>
        public IEnumerable<IWordSimilarityNode> PrefixLookup(string input, int maxResults)
        {
            var iLen = input.Length;
            ICollection<string> possibleResults;

            if (iLen < 3)
            {
                List<int> di;
                if (!Dictionary.TryGetValue(input, out di))
                {
                    return new WordSimilarityNode[0];
                }

                possibleResults = di.Select(index => Wordlist[index]).Where(word => word.Length >= iLen).ToList();
            }
            else
            {
                possibleResults = new HashSet<string>();
                var deletes = new HashSet<string> { input };
                //Arbitray decision of the allowable number of deletes.
                //Could have this start smaller and increase it if a threshold is not met.
                //But then we risk the problem of having a prefix with a lot of very close but uncommon words.
                //A small deletion count could miss a word with slightly more distance, but that is much more common.
                //I don't even know if those exist though.
                Edits(input, (iLen - 1) / 3, deletes);
                foreach (var delete in deletes)
                {
                    List<int> di;
                    if (!Dictionary.TryGetValue(delete, out di)) { continue; }
                    foreach (var word in di.Select(index => Wordlist[index]).Where(word => word.Length >= iLen))
                    {
                        possibleResults.Add(word);
                    }
                }
            }

            List<int> se;
            if (Dictionary.TryGetValue(input, out se) && se.Any(x => Wordlist[x] == input))
            {
                possibleResults.Add(input);
            }

            //If we only have a small amount, we can sort them all.
            if (possibleResults.Count <= maxResults)
            {
                return possibleResults.Select(word => new WordSimilarityNode { Word = word, Similarity = _stringSimilarityProvider.Similarity(input, word) });
            }

            var max = Math.Min(possibleResults.Count, maxResults);

            //With a large number of words to evaluate, but we only care about the top matches.
            //Instead of sorting a large number of elements at the end, we use a queue and restrict it's running size to max.
            //This queue will do that faster than other container types.
            //SortedList.RemoveAt is O(n)
            //SortedDictionary/SortedSet.ElementAt is O(n)
            var likelyWordsQueue = new WordQueue(max);
            foreach (var word in possibleResults)
            {
                var jw = _stringSimilarityProvider.Similarity(input, word);
                likelyWordsQueue.Enqueue(word, jw);
            }

            return likelyWordsQueue;
        }