Example #1
0
        public List <string> Partition(string input, StopWordsHandler stopWords, bool useWordStemmer = false)
        {
            Regex r = new Regex("([ \\t{}():;., \n])");

            input = input.ToLower();

            String [] tokens = r.Split(input);

            List <string> words = new List <string>();

            for (int i = 0; i < tokens.Length; i++)
            {
                MatchCollection mc = r.Matches(tokens[i]);
                if (mc.Count <= 0 && tokens[i].Trim().Length > 0 &&
                    !stopWords.IsStopword(tokens[i]))
                {
                    words.Add(tokens[i]);
                }
            }

            if (useWordStemmer)
            {
                // Process the word list with an implementation of Martin Porter's word stemmer algorithm.
                // This will reduce the words contained in the array to their "root" forms.
                PorterStemmer stemmer = new PorterStemmer();
                for (int i = 0; i < words.Count; i++)
                {
                    words[i] = stemmer.stemTerm(words[i]);
                }
            }

            return(words);
        }
 public BayesCategory(string cat, StopWordsHandler excluded)
 {
     m_Phrases  = new SortedDictionary <string, BayesPhraseCount>();
     m_Excluded = excluded;
     m_Name     = cat;
 }
 public BayesClassifier()
 {
     m_Categories    = new Dictionary <string, IBayesCategory>();
     m_ExcludedWords = new StopWordsHandler();
 }
 public BayesEnumerableCategory(string Cat, StopWordsHandler Excluded) : base(Cat, Excluded)
 {
 }