예제 #1
0
        public static List <News> ComputeWords(List <News> news)
        {
            foreach (News info in news)
            {
                var words = new List <string>();
                Filter(info);

                string[] wordstemp = info.rawData.Split(' ');
                for (int i = 0; i < wordstemp.Length; i++)
                {
                    // normalization
                    string word = wordstemp[i].ToLower().Trim();
                    if (WordQuilifier.WordIsOK(word))
                    {
                        // steaming
                        word = Stemmer.DoPorterStemming(word);
                        //if (word.Length > 2)
                        //{
                        words.Add(word);
                        //}
                    }
                }

                info.words = words;
            }

            return(news);
        }
예제 #2
0
        public List <string> getWordList(string pureText)
        {
            List <string> words = new List <string>();

            string[] wordstemp = pureText.Split(' ');
            for (int i = 0; i < wordstemp.Length; i++)
            {
                // normalization
                string word = wordstemp[i].ToLower().Trim();

                // steaming
                word = Stemmer.DoPorterStemming(word);

                if (WordQuilifier.WordIsOK(word))
                {
                    words.Add(word);
                }
            }
            return(words);
        }