/// <summary> /// Stems the a string containing ngrams. Only returns the stemmed version /// of words that are actually in the dictionary. /// </summary> /// <param name="ngram">The ngram.</param> /// <param name="dictionary">The dictionary.</param> /// <returns></returns> public static String StemNgram(String ngram, IDictionary <String, String> dictionary) { var newNgram = new StringBuilder(); string[] ngramsArray = ngram.Split(' '); foreach (string currentWord in ngramsArray) { var stemmer = new EnglishWordStemmer(currentWord); string stemmedNgram = stemmer.Stem; // only use the stemmed version if it is an actual word in our dictionary if (dictionary.ContainsKey(stemmedNgram)) { newNgram.Append(stemmedNgram); } else { newNgram.Append(currentWord); } // space out our words newNgram.Append(" "); } return(newNgram.ToString().Trim()); }
public void BasicStemmerTest() { var stemmer = new EnglishWordStemmer(""); Assert.AreEqual(stemmer.Stem, ""); stemmer = new EnglishWordStemmer("exceedingly"); Assert.AreEqual(stemmer.Stem, "exceed"); stemmer = new EnglishWordStemmer("running"); Assert.AreEqual(stemmer.Stem, "run"); // TODO: add dictionary check to results, return orig if not valid //stemmer = new EnglishWordStemmer("happier"); //Assert.AreEqual(stemmer.Stem, "happy"); stemmer = new EnglishWordStemmer("slim"); Assert.AreEqual(stemmer.Stem, "slim"); }