public string GetBestMatchWord(string OriginalWord)
        {
            EnsureIndexed();
            var existing = indexReader.DocFreq(new Term("word", OriginalWord));

            if (existing > 0)
            {
                return(OriginalWord);
            }
            var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, 10, null, "word", true);
            var jaro        = new JaroWinklerDistance();
            var leven       = new LevenshteinDistance();
            var ngram       = new NGramDistance();
            var metrics     = suggestions.Select(s => new
            {
                word  = s,
                freq  = indexReader.DocFreq(new Term("word", s)),
                jaro  = jaro.GetDistance(OriginalWord, s),
                leven = leven.GetDistance(OriginalWord, s),
                ngram = ngram.GetDistance(OriginalWord, s)
            })
                              .OrderByDescending(metric =>
                                                 (
                                                     (metric.freq / 100f) +
                                                     metric.jaro +
                                                     metric.leven +
                                                     metric.ngram
                                                 )
                                                 / 4f
                                                 )
                              .ToList();

            return(metrics.Select(m => m.word).FirstOrDefault());
        }
        public AlternateWordList GetAlternateWordList(string OriginalWord, int NumberToReturn)
        {
            var wordList = new AlternateWordList();

            wordList.OriginalWord = OriginalWord;

            EnsureIndexed();
            var existing = indexReader.DocFreq(new Term("word", OriginalWord));

            wordList.OriginalWordFrequency = existing;

            var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, NumberToReturn, null, "word", true);
            var jaro        = new JaroWinklerDistance();
            var leven       = new LevenshteinDistance();
            var ngram       = new NGramDistance();
            var metrics     = suggestions.Select(s => new
            {
                word  = s,
                freq  = indexReader.DocFreq(new Term("word", s)),
                jaro  = jaro.GetDistance(OriginalWord, s),
                leven = leven.GetDistance(OriginalWord, s),
                ngram = ngram.GetDistance(OriginalWord, s)
            })
                              .OrderByDescending(metric =>
                                                 (
                                                     (metric.freq / 100f) +
                                                     metric.jaro +
                                                     metric.leven +
                                                     metric.ngram
                                                 )
                                                 / 4f
                                                 )
                              .ToList();

            var list      = new List <AlternateWord>();
            var sortOrder = 1;

            foreach (var item in metrics)
            {
                var altWord = new AlternateWord();
                altWord.Word               = item.word;
                altWord.Frequency          = item.freq;
                altWord.JaroWinkler        = item.jaro;
                altWord.Levenshtein        = item.leven;
                altWord.NGram              = item.ngram;
                altWord.BestMatchScore     = ((item.freq / 100f) + item.jaro + item.leven + item.ngram) / 4f;
                altWord.BestMatchSortOrder = sortOrder;

                list.Add(altWord);
                sortOrder++;
            }

            wordList.Words = list;
            return(wordList);
        }
Exemple #3
0
    protected void Page_Load(object sender, EventArgs e)
    {
        string s1 = "Holmes";
        string s2 = "Holmes2136";


        Response.Write(JaroWinklerDistance.GetDistance(s1, s2));

        Response.Write("<BR>");

        Response.Write(new LevenshteinDistance().GetDistance(s1, s2));

        Response.Write("<BR>");
    }
        public List <string> GetTopSuggestions(string value, int numberOfItems)
        {
            EnsureIndexed();
            var suggestionCollection = new List <string>();
            var existing             = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, value));

            if (existing > 0)// the fist one will be correct of exist
            {
                suggestionCollection.Add(value);
            }

            var suggestions = _checker.SuggestSimilar(value, numberOfItems, null, SpellCheckerConstants.SpellCheckerKey, true);
            var jaro        = new JaroWinklerDistance();
            var leven       = new LevenshteinDistance();
            var ngram       = new NGramDistance();
            var metrics     = suggestions.Select(s => new
            {
                word  = s,
                freq  = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, s)),
                jaro  = jaro.GetDistance(value, s),
                leven = leven.GetDistance(value, s),
                ngram = ngram.GetDistance(value, s)
            })
                              .OrderByDescending(metric => metric.jaro)
                              .ThenByDescending(m => m.ngram)
                              .ThenByDescending(metric =>
                                                (
                                                    metric.freq / 100f +
                                                    metric.leven
                                                )
                                                / 2f
                                                )
                              .ToList();

            var wordsOnly = metrics.Select(m => m.word).ToList();

            suggestionCollection.AddRange(wordsOnly);

            return(suggestionCollection);
        }