private BayesClassifier InitializeResolver(bool useWordStemmer) { BayesClassifier classifier = new BayesClassifier(); Dictionary <string, string[]> phrases = new Dictionary <string, string[]>(); foreach (KeyValuePair <string, string> kvp in this.Dictionary) { string[] documentInfo = new Tokeniser().Partition( kvp.Value, new StopWordsHandler(), false).ToArray(); classifier.TeachPhrases(kvp.Key.ToString(), documentInfo, useWordStemmer); } _dictionaryDirty = false; return(classifier); }
private List <string> GenerateTerms(Dictionary <string, string> docs) { List <string> uniques = new List <string>(); foreach (KeyValuePair <string, string> kvp in docs) { Tokeniser tokenizer = new Tokeniser(); List <string> words = tokenizer.Partition(kvp.Value, _stopWords, _useWordStemmer); foreach (string word in words) { if (!uniques.Contains(word)) { uniques.Add(word); } } } return(uniques); }
private IDictionary GetWordFrequency(string input) { string convertedInput = input.ToLower(); Tokeniser tokenizer = new Tokeniser(); List <string> words = tokenizer.Partition(convertedInput, _stopWords, _useWordStemmer); words.Sort(); List <string> distinctWords = GetDistinctWords(words); IDictionary result = new Hashtable(); foreach (string word in distinctWords) { object tmp; tmp = CountWords(word, words); result[word] = tmp; } return(result); }