예제 #1
0
        /// <summary>
        /// Trains this Category from a word or phrase<\summary>
        /// <seealso cref="DePhrase(string)">
        /// See DePhrase </seealso>
        public void TeachPhrase(string rawPhrase)
        {
            if ((null != m_Excluded) && (m_Excluded.IsExcluded(rawPhrase)))
            {
                return;
            }

            PhraseCount pc;
            string      Phrase = DePhrase(rawPhrase);

            if (!m_Phrases.TryGetValue(Phrase, out pc))
            {
                pc = new PhraseCount(rawPhrase);
                m_Phrases.Add(Phrase, pc);
            }
            pc.Count++;
            m_TotalWords++;
        }
예제 #2
0
        /// <summary>
        /// Classifies a text<\summary>
        /// <returns>
        /// returns classification values for the text, the higher, the better is the match.</returns>
        public Dictionary <string, double> Classify(System.IO.StreamReader tr)
        {
            Dictionary <string, double> score = new Dictionary <string, double>();

            foreach (KeyValuePair <string, ICategory> cat in m_Categories)
            {
                score.Add(cat.Value.Name, 0.0);
            }

            EnumerableCategory words_in_file = new EnumerableCategory("", m_ExcludedWords);

            words_in_file.TeachCategory(tr);

            foreach (KeyValuePair <string, PhraseCount> kvp1 in words_in_file)
            {
                PhraseCount pc_in_file = kvp1.Value;
                foreach (KeyValuePair <string, ICategory> kvp in m_Categories)
                {
                    ICategory cat   = kvp.Value;
                    int       count = cat.GetPhraseCount(pc_in_file.RawPhrase);
                    if (0 < count)
                    {
                        score[cat.Name] += System.Math.Log((double)count / (double)cat.TotalWords);
                    }
                    else
                    {
                        score[cat.Name] += System.Math.Log(0.01 / (double)cat.TotalWords);
                    }
                    System.Diagnostics.Trace.WriteLine(pc_in_file.RawPhrase.ToString() + "(" +
                                                       cat.Name + ")" + score[cat.Name]);
                }
            }
            foreach (KeyValuePair <string, ICategory> kvp in m_Categories)
            {
                ICategory cat = kvp.Value;
                score[cat.Name] += System.Math.Log((double)cat.TotalWords / (double)this.CountTotalWordsInCategories());
            }
            return(score);
        }