private BayesClassifier InitializeResolver(bool useWordStemmer)
        {
            BayesClassifier classifier            = new BayesClassifier();
            Dictionary <string, string[]> phrases = new Dictionary <string, string[]>();

            foreach (KeyValuePair <string, string> kvp in this.Dictionary)
            {
                string[] documentInfo = new Tokeniser().Partition(
                    kvp.Value, new StopWordsHandler(), false).ToArray();
                classifier.TeachPhrases(kvp.Key.ToString(), documentInfo, useWordStemmer);
            }

            _dictionaryDirty = false;

            return(classifier);
        }
Пример #2
0
        private List <string> GenerateTerms(Dictionary <string, string> docs)
        {
            List <string> uniques = new List <string>();

            foreach (KeyValuePair <string, string> kvp in docs)
            {
                Tokeniser     tokenizer = new Tokeniser();
                List <string> words     = tokenizer.Partition(kvp.Value, _stopWords, _useWordStemmer);

                foreach (string word in words)
                {
                    if (!uniques.Contains(word))
                    {
                        uniques.Add(word);
                    }
                }
            }
            return(uniques);
        }
Пример #3
0
        private IDictionary GetWordFrequency(string input)
        {
            string convertedInput = input.ToLower();

            Tokeniser     tokenizer = new Tokeniser();
            List <string> words     = tokenizer.Partition(convertedInput, _stopWords, _useWordStemmer);

            words.Sort();

            List <string> distinctWords = GetDistinctWords(words);

            IDictionary result = new Hashtable();

            foreach (string word in distinctWords)
            {
                object tmp;
                tmp          = CountWords(word, words);
                result[word] = tmp;
            }

            return(result);
        }