/// <summary>
        ///     Stems the a string containing ngrams. Only returns the stemmed version
        ///     of words that are actually in the dictionary.
        /// </summary>
        /// <param name="ngram">The ngram.</param>
        /// <param name="dictionary">The dictionary.</param>
        /// <returns></returns>
        public static String StemNgram(String ngram, IDictionary <String, String> dictionary)
        {
            var newNgram = new StringBuilder();

            string[] ngramsArray = ngram.Split(' ');

            foreach (string currentWord in ngramsArray)
            {
                var    stemmer      = new EnglishWordStemmer(currentWord);
                string stemmedNgram = stemmer.Stem;

                // only use the stemmed version if it is an actual word in our dictionary
                if (dictionary.ContainsKey(stemmedNgram))
                {
                    newNgram.Append(stemmedNgram);
                }
                else
                {
                    newNgram.Append(currentWord);
                }

                // space out our words
                newNgram.Append(" ");
            }

            return(newNgram.ToString().Trim());
        }
Example #2
0
        public void BasicStemmerTest()
        {
            var stemmer = new EnglishWordStemmer("");

            Assert.AreEqual(stemmer.Stem, "");

            stemmer = new EnglishWordStemmer("exceedingly");
            Assert.AreEqual(stemmer.Stem, "exceed");

            stemmer = new EnglishWordStemmer("running");
            Assert.AreEqual(stemmer.Stem, "run");

            // TODO: add dictionary check to results, return orig if not valid
            //stemmer = new EnglishWordStemmer("happier");
            //Assert.AreEqual(stemmer.Stem, "happy");

            stemmer = new EnglishWordStemmer("slim");
            Assert.AreEqual(stemmer.Stem, "slim");
        }