Exemplos de código com TextClassificationProblem, NPatternRecognizer.Algorithm.NaiveBayes em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: CostSensitiveNaiveBayes.cs Projeto: foamliu/NPatternRecognizer

        public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result)
        {
            int        j = 0, k; // j is index in categories, while k is index in vocabulary.
            double     logv;
            Vocabulary voc;

            voc = problem.TrainingSetVocabulary;

            foreach (Category c in m_CategoryCollection.Collection)
            {
                logv  = 0.0; // reset
                logv += Math.Log(m_prob_vj[j]);

                // for all the word (token) in the text
                foreach (string token in text.Tokens.Keys)
                {
                    if (voc.WordBag.ContainsKey(token))
                    {
                        // Get the position of this token
                        //  in m_TotalTrainingSetTokens.
                        k = voc.WordPositionMap[token];
                        // Look up the probability in the table.
                        logv += m_prob_wk_vj_log[k, j];
                    }
                }
                result.CategoryName2LogVMap.Add(c.Name, logv);
                j++;    // next category
            }
            result.Normalize();
        }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: CostSensitiveNaiveBayes.cs Projeto: blat001/Achievement-Sherpa

        public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result)
        {
            int j = 0, k;   // j is index in categories, while k is index in vocabulary.
            double logv;
            Vocabulary voc;

            voc = problem.TrainingSetVocabulary;

            foreach (Category c in m_CategoryCollection.Collection)
            {
                logv = 0.0; // reset
                logv += Math.Log(m_prob_vj[j]);

                // for all the word (token) in the text
                foreach (string token in text.Tokens.Keys)
                {
                    if (voc.WordBag.ContainsKey(token))
                    {
                        // Get the position of this token
                        //  in m_TotalTrainingSetTokens.
                        k = voc.WordPositionMap[token];
                        // Look up the probability in the table.
                        logv += m_prob_wk_vj_log[k, j];
                    }
                }
                result.CategoryName2LogVMap.Add(c.Name, logv);
                j++;    // next category
            }
            result.Normalize();
        }

Exemplo n.º 3

0

Exibir arquivo

        public double CrossValidate(TextClassificationProblem problem)
        {
            ExampleSet v_Set;   // validation set

            //Logging.Info("Retrieving validation set");
            v_Set = problem.ValidationSet;

            int numExample = v_Set.Examples.Count;
            int numCorrect = 0;

            //Logging.Info("Cross Validating on validation set");

            foreach (TextExample example in v_Set.Examples)
            {
                ClassificationResult result = new ClassificationResult();
                this.Predict(problem, example, ref result);
                if (result.Vnb == example.Label.Name)
                {
                    numCorrect++;
                }
            }

            double correctRatio = 1.0 * numCorrect / numExample;

            Logger.Info(string.Format("Correct ratio: {0}", correctRatio));

            return(correctRatio);
        }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: CostSensitiveNaiveBayes.cs Projeto: blat001/Achievement-Sherpa

        public void CrossValidate(TextClassificationProblem problem, out double correctRatio, out double falsePositive, out double falseNegative)
        {
            ExampleSet v_Set;   // validation set

            //Logging.Info("Retrieving validation set");
            v_Set = problem.ValidationSet;

            int numExample = v_Set.Examples.Count;
            int numCorrect = 0;
            int numFP = 0, numFN = 0;

            //Logging.Info("Cross Validating on validation set");

            foreach (TextExample example in v_Set.Examples)
            {
                ClassificationResult result = new ClassificationResult();
                this.Predict(problem, example, ref result);
                string res = this.MinCostClassName(result);

                if (res == example.Label.Name)
                {
                    numCorrect++;
                }
                else if (res == className[0] && example.Label.Name == className[1])
                {
                    numFP++;
                }
                else if (res == className[1] && example.Label.Name == className[0])
                {
                    numFN++;
                }
            }

            correctRatio = 1.0 * numCorrect / numExample;
            falsePositive = 1.0 * numFP / numExample;
            falseNegative = 1.0 * numFN / numExample;
            Logger.Info("Correct ratio: {0}", correctRatio);
            Logger.Info("False Positive: {0}", falsePositive);
            Logger.Info("False Negative: {0}", falseNegative);
        }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: CostSensitiveNaiveBayes.cs Projeto: foamliu/NPatternRecognizer

        public void CrossValidate(TextClassificationProblem problem, out double correctRatio, out double falsePositive, out double falseNegative)
        {
            ExampleSet v_Set;   // validation set

            //Logging.Info("Retrieving validation set");
            v_Set = problem.ValidationSet;

            int numExample = v_Set.Examples.Count;
            int numCorrect = 0;
            int numFP = 0, numFN = 0;

            //Logging.Info("Cross Validating on validation set");

            foreach (TextExample example in v_Set.Examples)
            {
                ClassificationResult result = new ClassificationResult();
                this.Predict(problem, example, ref result);
                string res = this.MinCostClassName(result);

                if (res == example.Label.Name)
                {
                    numCorrect++;
                }
                else if (res == className[0] && example.Label.Name == className[1])
                {
                    numFP++;
                }
                else if (res == className[1] && example.Label.Name == className[0])
                {
                    numFN++;
                }
            }

            correctRatio  = 1.0 * numCorrect / numExample;
            falsePositive = 1.0 * numFP / numExample;
            falseNegative = 1.0 * numFN / numExample;
            Logger.Info("Correct ratio: {0}", correctRatio);
            Logger.Info("False Positive: {0}", falsePositive);
            Logger.Info("False Negative: {0}", falseNegative);
        }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: NaiveBayes.cs Projeto: blat001/Achievement-Sherpa

        public double CrossValidate(TextClassificationProblem problem)
        {
            ExampleSet v_Set;   // validation set

            //Logging.Info("Retrieving validation set");
            v_Set = problem.ValidationSet;

            int numExample = v_Set.Examples.Count;
            int numCorrect = 0;

            //Logging.Info("Cross Validating on validation set");

            foreach (TextExample example in v_Set.Examples)
            {
                ClassificationResult result = new ClassificationResult();
                this.Predict(problem, example, ref result);
                if (result.Vnb == example.Label.Name)
                {
                    numCorrect++;
                }
            }

            double correctRatio = 1.0 * numCorrect / numExample;
            Logger.Info(string.Format("Correct ratio: {0}", correctRatio));

            return correctRatio;
        }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: NaiveBayes.cs Projeto: blat001/Achievement-Sherpa

        /// <summary>
        /// Calculate P(vj) and P(wk|vj)
        /// 
        /// In Total:
        /// Variables   Num
        /// P(vj)       c
        /// P(wk|vj)    n*c
        /// 
        /// </summary>
        private void CalculateProbabilities(TextClassificationProblem problem)
        {
            ExampleSet t_Set;   // training set
            int numCategory;
            CategoryCollection categoryCollection;
            Vocabulary voc;

            //Logging.Info("Retrieving vocabulary");
            voc = problem.TrainingSetVocabulary;
            //Logging.Info("Retrieving training set");
            t_Set = problem.TrainingSet;
            numCategory = problem.CategoryCount;
            categoryCollection = problem.CategoryCollection;

            //Logging.Info("Calculating probabilities");

            // Step1: Calculate Probabilities
            //
            int numVocabulary = voc.Count;
            m_prob_vj = new double[numCategory];
            m_prob_wk_vj_log = new double[numVocabulary, numCategory];

            // Step2: P(vj)
            //
            for (int i = 0; i < numCategory; i++)
            {
                m_prob_vj[i] = 1.0 / numCategory;
            }

            // Step3: P(wk|vj)
            //

            NaiveBayesCategoryCollection collection = new NaiveBayesCategoryCollection(categoryCollection);
            foreach (Example example in t_Set.Examples)
            {
                collection.AddExample((TextExample)example);
            }

            //  P(wk|vj) = (nc+1)/(n+|Vocabulary|)
            //
            //  nc: the occurence of wk in the n positions.
            //  n:  word position numbers for category vj.
            int nc, n;
            //
            // k: index in vacabulary;
            // j: index in categories

            int k = 0, j = 0;

            foreach (NaiveBayesCategory c in collection.CategorySet)
            {
                k = 0;  // reset

                foreach (string word in voc.WordBag.Keys)
                {
                    if (c.WordBag.ContainsKey(word))
                        nc = c.WordBag[word];
                    else
                        nc = 0;

                    n = c.WordBagOccurence;
                    //m_prob_wk_vj[k, j] = (nc + 1.0) / (c.Count + numVocabulary);
                    m_prob_wk_vj_log[k, j] = Math.Log((nc + 1.0) / (n + numVocabulary));

                    k++;    // next word
                }
                j++;    // next category
            }
        }

Exemplo n.º 8

0

Exibir arquivo

Arquivo: NaiveBayes.cs Projeto: blat001/Achievement-Sherpa

 public void Train(TextClassificationProblem problem)
 {
     this.CalculateProbabilities(problem);
 }

Exemplo n.º 9

0

Exibir arquivo

 public void Train(TextClassificationProblem problem)
 {
     this.CalculateProbabilities(problem);
 }

Exemplo n.º 10

0

Exibir arquivo

        /// <summary>
        /// Calculate P(vj) and P(wk|vj)
        ///
        /// In Total:
        /// Variables   Num
        /// P(vj)       c
        /// P(wk|vj)    n*c
        ///
        /// </summary>
        private void CalculateProbabilities(TextClassificationProblem problem)
        {
            ExampleSet         t_Set; // training set
            int                numCategory;
            CategoryCollection categoryCollection;
            Vocabulary         voc;

            //Logging.Info("Retrieving vocabulary");
            voc = problem.TrainingSetVocabulary;
            //Logging.Info("Retrieving training set");
            t_Set              = problem.TrainingSet;
            numCategory        = problem.CategoryCount;
            categoryCollection = problem.CategoryCollection;

            //Logging.Info("Calculating probabilities");

            // Step1: Calculate Probabilities
            //
            int numVocabulary = voc.Count;

            m_prob_vj        = new double[numCategory];
            m_prob_wk_vj_log = new double[numVocabulary, numCategory];


            // Step2: P(vj)
            //
            for (int i = 0; i < numCategory; i++)
            {
                m_prob_vj[i] = 1.0 / numCategory;
            }

            // Step3: P(wk|vj)
            //

            NaiveBayesCategoryCollection collection = new NaiveBayesCategoryCollection(categoryCollection);

            foreach (Example example in t_Set.Examples)
            {
                collection.AddExample((TextExample)example);
            }

            //  P(wk|vj) = (nc+1)/(n+|Vocabulary|)
            //
            //  nc: the occurence of wk in the n positions.
            //  n:  word position numbers for category vj.
            int nc, n;
            //
            // k: index in vacabulary;
            // j: index in categories

            int k = 0, j = 0;

            foreach (NaiveBayesCategory c in collection.CategorySet)
            {
                k = 0;  // reset

                foreach (string word in voc.WordBag.Keys)
                {
                    if (c.WordBag.ContainsKey(word))
                    {
                        nc = c.WordBag[word];
                    }
                    else
                    {
                        nc = 0;
                    }

                    n = c.WordBagOccurence;
                    //m_prob_wk_vj[k, j] = (nc + 1.0) / (c.Count + numVocabulary);
                    m_prob_wk_vj_log[k, j] = Math.Log((nc + 1.0) / (n + numVocabulary));

                    k++; // next word
                }
                j++;     // next category
            }
        }

Exemplos de NPatternRecognizer.Algorithm.NaiveBayes TextClassificationProblem em C# (CSharp)