public string GetBestMatchWord(string OriginalWord) { EnsureIndexed(); var existing = indexReader.DocFreq(new Term("word", OriginalWord)); if (existing > 0) { return(OriginalWord); } var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, 10, null, "word", true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = indexReader.DocFreq(new Term("word", s)), jaro = jaro.GetDistance(OriginalWord, s), leven = leven.GetDistance(OriginalWord, s), ngram = ngram.GetDistance(OriginalWord, s) }) .OrderByDescending(metric => ( (metric.freq / 100f) + metric.jaro + metric.leven + metric.ngram ) / 4f ) .ToList(); return(metrics.Select(m => m.word).FirstOrDefault()); }
public AlternateWordList GetAlternateWordList(string OriginalWord, int NumberToReturn) { var wordList = new AlternateWordList(); wordList.OriginalWord = OriginalWord; EnsureIndexed(); var existing = indexReader.DocFreq(new Term("word", OriginalWord)); wordList.OriginalWordFrequency = existing; var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, NumberToReturn, null, "word", true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = indexReader.DocFreq(new Term("word", s)), jaro = jaro.GetDistance(OriginalWord, s), leven = leven.GetDistance(OriginalWord, s), ngram = ngram.GetDistance(OriginalWord, s) }) .OrderByDescending(metric => ( (metric.freq / 100f) + metric.jaro + metric.leven + metric.ngram ) / 4f ) .ToList(); var list = new List <AlternateWord>(); var sortOrder = 1; foreach (var item in metrics) { var altWord = new AlternateWord(); altWord.Word = item.word; altWord.Frequency = item.freq; altWord.JaroWinkler = item.jaro; altWord.Levenshtein = item.leven; altWord.NGram = item.ngram; altWord.BestMatchScore = ((item.freq / 100f) + item.jaro + item.leven + item.ngram) / 4f; altWord.BestMatchSortOrder = sortOrder; list.Add(altWord); sortOrder++; } wordList.Words = list; return(wordList); }
protected void Page_Load(object sender, EventArgs e) { string s1 = "Holmes"; string s2 = "Holmes2136"; Response.Write(JaroWinklerDistance.GetDistance(s1, s2)); Response.Write("<BR>"); Response.Write(new LevenshteinDistance().GetDistance(s1, s2)); Response.Write("<BR>"); }
public List <string> GetTopSuggestions(string value, int numberOfItems) { EnsureIndexed(); var suggestionCollection = new List <string>(); var existing = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, value)); if (existing > 0)// the fist one will be correct of exist { suggestionCollection.Add(value); } var suggestions = _checker.SuggestSimilar(value, numberOfItems, null, SpellCheckerConstants.SpellCheckerKey, true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, s)), jaro = jaro.GetDistance(value, s), leven = leven.GetDistance(value, s), ngram = ngram.GetDistance(value, s) }) .OrderByDescending(metric => metric.jaro) .ThenByDescending(m => m.ngram) .ThenByDescending(metric => ( metric.freq / 100f + metric.leven ) / 2f ) .ToList(); var wordsOnly = metrics.Select(m => m.word).ToList(); suggestionCollection.AddRange(wordsOnly); return(suggestionCollection); }