public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result)
        {
            int j = 0, k;   // j is index in categories, while k is index in vocabulary.
            double logv;
            Vocabulary voc;

            voc = problem.TrainingSetVocabulary;

            foreach (Category c in m_CategoryCollection.Collection)
            {
                logv = 0.0; // reset
                logv += Math.Log(m_prob_vj[j]);

                // for all the word (token) in the text
                foreach (string token in text.Tokens.Keys)
                {
                    if (voc.WordBag.ContainsKey(token))
                    {
                        // Get the position of this token
                        //  in m_TotalTrainingSetTokens.
                        k = voc.WordPositionMap[token];
                        // Look up the probability in the table.
                        logv += m_prob_wk_vj_log[k, j];
                    }
                }
                result.CategoryName2LogVMap.Add(c.Name, logv);
                j++;    // next category
            }
            result.Normalize();
        }
        public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result)
        {
            int        j = 0, k; // j is index in categories, while k is index in vocabulary.
            double     logv;
            Vocabulary voc;

            voc = problem.TrainingSetVocabulary;

            foreach (Category c in m_CategoryCollection.Collection)
            {
                logv  = 0.0; // reset
                logv += Math.Log(m_prob_vj[j]);

                // for all the word (token) in the text
                foreach (string token in text.Tokens.Keys)
                {
                    if (voc.WordBag.ContainsKey(token))
                    {
                        // Get the position of this token
                        //  in m_TotalTrainingSetTokens.
                        k = voc.WordPositionMap[token];
                        // Look up the probability in the table.
                        logv += m_prob_wk_vj_log[k, j];
                    }
                }
                result.CategoryName2LogVMap.Add(c.Name, logv);
                j++;    // next category
            }
            result.Normalize();
        }
 public void AddExample(TextExample example)
 {
     foreach (string word in example.Tokens.Keys)
     {
         Utility.AddToken(m_WordCountPairCollection, word, example.Tokens[word]);
         m_WordBagOccurence += example.Tokens[word];
     }
 }
 public void AddExample(TextExample example)
 {
     foreach (string word in example.Tokens.Keys)
     {
         Utility.AddToken(m_WordCountPairCollection, word, example.Tokens[word]);
         m_WordBagOccurence += example.Tokens[word];
     }
 }
Ejemplo n.º 5
0
        private static void BuildExample(TextExample example, Vocabulary voc, int exampleCount)
        {
            int dimension = voc.Count;
            SparseVector vector = new SparseVector(dimension);

            foreach (string word in example.Tokens.Keys)
            {
                int pos = voc.GetWordPosition(word);
                if (pos == Constants.KEY_NOT_FOUND)
                    continue;

                // phi i(x) = tfi log(idfi) /k
                // tfi:     number of occurences of the term i in the document x
                // idfi:    the ratio between the total number of documents and the
                //              number of documents containing the term
                // k:       normalisation constant ensuring that ||phi|| = 1
                double phi = example.Tokens[word] * Math.Log(exampleCount / voc.WordExampleOccurMap[word]);
                vector.Components.Add(pos, phi);

            }
            vector.Normalize();
            example.X = vector;
        }
 public void AddExample(TextExample example)
 {
     m_CategoryMap[example.Label].AddExample(example);
 }
 public void AddExample(TextExample example)
 {
     m_CategoryMap[example.Label].AddExample(example);
 }