public List <string> Partition(string input, StopWordsHandler stopWords, bool useWordStemmer = false) { Regex r = new Regex("([ \\t{}():;., \n])"); input = input.ToLower(); String [] tokens = r.Split(input); List <string> words = new List <string>(); for (int i = 0; i < tokens.Length; i++) { MatchCollection mc = r.Matches(tokens[i]); if (mc.Count <= 0 && tokens[i].Trim().Length > 0 && !stopWords.IsStopword(tokens[i])) { words.Add(tokens[i]); } } if (useWordStemmer) { // Process the word list with an implementation of Martin Porter's word stemmer algorithm. // This will reduce the words contained in the array to their "root" forms. PorterStemmer stemmer = new PorterStemmer(); for (int i = 0; i < words.Count; i++) { words[i] = stemmer.stemTerm(words[i]); } } return(words); }
public BayesCategory(string cat, StopWordsHandler excluded) { m_Phrases = new SortedDictionary <string, BayesPhraseCount>(); m_Excluded = excluded; m_Name = cat; }
public BayesClassifier() { m_Categories = new Dictionary <string, IBayesCategory>(); m_ExcludedWords = new StopWordsHandler(); }
public BayesEnumerableCategory(string Cat, StopWordsHandler Excluded) : base(Cat, Excluded) { }