예제 #1
0
        private static bool AddNewWordsToWordList()
        {
            var words = File.ReadAllLines(NewWordsListPath, Encoding.UTF8);

            if (Nuve.AreAllWordsValid(words) == false)
            {
                Console.WriteLine(@"Warning: Not all words in the new word list are correct");
                return(false);
            }

            var unigrams = File.ReadAllLines(WordListPath, Encoding.UTF8).ToList();

            unigrams.AddRange(words);
            unigrams.Sort();

            File.WriteAllLines(WordListPath, unigrams.Distinct(), Encoding.UTF8);

            return(true);
        }
예제 #2
0
        /// <summary>
        /// Tries to stem everyword in given parameter and returns a map contains word-stem pairs.
        /// If a word can not be stemmed, it is ignored and does not exist in the map.
        /// </summary>
        /// <param name="words">words to be stemmed</param>
        /// <returns>A map which contains word-stem pairs for every stemmable word in the given parameter.</returns>
        public static IDictionary <string, string> Stem(IEnumerable <string> words)
        {
            var map   = new Dictionary <string, string>();
            int count = 0;

            foreach (var word in words)
            {
                string stem;
                if (!map.ContainsKey(word) && Nuve.TryStem(word, out stem))
                {
                    map.Add(word, stem);
                }
                else
                {
                    count++;
                }
            }
            Console.WriteLine(count + " of " + words.Count() + " words can not be stemmed");
            return(map);
        }