private IEnumerable <LevensteinInfo> FindClosestWords(string word, int editDistance) { var wordTrigrams = TrigramIndex.GetTrigramsFrom(word); return(trigramIndex.GetWordListUnion(wordTrigrams).Select( dictionaryWord => new LevensteinInfo(dictionaryWord, word)).Where(info => info.GetDistance() <= editDistance)); }
public void TestIndexCreation() { var index = new TrigramIndex(new List <string>()); Console.WriteLine(String.Join("\n", index.Trigrams.OrderByDescending(t => t.Value.Count).Select( t => t.Key + "\t" + t.Value.Count))); }
public Mispellings(IEnumerable <string> someWords, IEnumerable <string> correctWords) { trigramIndex = new TrigramIndex(correctWords); WordFrequencies = someWords .GroupBy(w => w, (w, ws) => Tuple.Create(w, ws.Count())) .ToDictionary(it => it.Item1, it => it.Item2); var unknownWords = WordFrequencies.Keys.Where(w => !trigramIndex.ContainsWord(w)); var levensteinInfos = RetrieveLevensteinInfos(unknownWords).ToList(); fuzzyDictionary = GetFuzzyDictionary(levensteinInfos); }