public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result) { int j = 0, k; // j is index in categories, while k is index in vocabulary. double logv; Vocabulary voc; voc = problem.TrainingSetVocabulary; foreach (Category c in m_CategoryCollection.Collection) { logv = 0.0; // reset logv += Math.Log(m_prob_vj[j]); // for all the word (token) in the text foreach (string token in text.Tokens.Keys) { if (voc.WordBag.ContainsKey(token)) { // Get the position of this token // in m_TotalTrainingSetTokens. k = voc.WordPositionMap[token]; // Look up the probability in the table. logv += m_prob_wk_vj_log[k, j]; } } result.CategoryName2LogVMap.Add(c.Name, logv); j++; // next category } result.Normalize(); }
public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result) { int j = 0, k; // j is index in categories, while k is index in vocabulary. double logv; Vocabulary voc; voc = problem.TrainingSetVocabulary; foreach (Category c in m_CategoryCollection.Collection) { logv = 0.0; // reset logv += Math.Log(m_prob_vj[j]); // for all the word (token) in the text foreach (string token in text.Tokens.Keys) { if (voc.WordBag.ContainsKey(token)) { // Get the position of this token // in m_TotalTrainingSetTokens. k = voc.WordPositionMap[token]; // Look up the probability in the table. logv += m_prob_wk_vj_log[k, j]; } } result.CategoryName2LogVMap.Add(c.Name, logv); j++; // next category } result.Normalize(); }
public double CrossValidate(TextClassificationProblem problem) { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; //Logging.Info("Cross Validating on validation set"); foreach (TextExample example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); this.Predict(problem, example, ref result); if (result.Vnb == example.Label.Name) { numCorrect++; } } double correctRatio = 1.0 * numCorrect / numExample; Logger.Info(string.Format("Correct ratio: {0}", correctRatio)); return(correctRatio); }
public void CrossValidate(TextClassificationProblem problem, out double correctRatio, out double falsePositive, out double falseNegative) { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; int numFP = 0, numFN = 0; //Logging.Info("Cross Validating on validation set"); foreach (TextExample example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); this.Predict(problem, example, ref result); string res = this.MinCostClassName(result); if (res == example.Label.Name) { numCorrect++; } else if (res == className[0] && example.Label.Name == className[1]) { numFP++; } else if (res == className[1] && example.Label.Name == className[0]) { numFN++; } } correctRatio = 1.0 * numCorrect / numExample; falsePositive = 1.0 * numFP / numExample; falseNegative = 1.0 * numFN / numExample; Logger.Info("Correct ratio: {0}", correctRatio); Logger.Info("False Positive: {0}", falsePositive); Logger.Info("False Negative: {0}", falseNegative); }
public void CrossValidate(TextClassificationProblem problem, out double correctRatio, out double falsePositive, out double falseNegative) { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; int numFP = 0, numFN = 0; //Logging.Info("Cross Validating on validation set"); foreach (TextExample example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); this.Predict(problem, example, ref result); string res = this.MinCostClassName(result); if (res == example.Label.Name) { numCorrect++; } else if (res == className[0] && example.Label.Name == className[1]) { numFP++; } else if (res == className[1] && example.Label.Name == className[0]) { numFN++; } } correctRatio = 1.0 * numCorrect / numExample; falsePositive = 1.0 * numFP / numExample; falseNegative = 1.0 * numFN / numExample; Logger.Info("Correct ratio: {0}", correctRatio); Logger.Info("False Positive: {0}", falsePositive); Logger.Info("False Negative: {0}", falseNegative); }
public double CrossValidate(TextClassificationProblem problem) { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; //Logging.Info("Cross Validating on validation set"); foreach (TextExample example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); this.Predict(problem, example, ref result); if (result.Vnb == example.Label.Name) { numCorrect++; } } double correctRatio = 1.0 * numCorrect / numExample; Logger.Info(string.Format("Correct ratio: {0}", correctRatio)); return correctRatio; }
/// <summary> /// Calculate P(vj) and P(wk|vj) /// /// In Total: /// Variables Num /// P(vj) c /// P(wk|vj) n*c /// /// </summary> private void CalculateProbabilities(TextClassificationProblem problem) { ExampleSet t_Set; // training set int numCategory; CategoryCollection categoryCollection; Vocabulary voc; //Logging.Info("Retrieving vocabulary"); voc = problem.TrainingSetVocabulary; //Logging.Info("Retrieving training set"); t_Set = problem.TrainingSet; numCategory = problem.CategoryCount; categoryCollection = problem.CategoryCollection; //Logging.Info("Calculating probabilities"); // Step1: Calculate Probabilities // int numVocabulary = voc.Count; m_prob_vj = new double[numCategory]; m_prob_wk_vj_log = new double[numVocabulary, numCategory]; // Step2: P(vj) // for (int i = 0; i < numCategory; i++) { m_prob_vj[i] = 1.0 / numCategory; } // Step3: P(wk|vj) // NaiveBayesCategoryCollection collection = new NaiveBayesCategoryCollection(categoryCollection); foreach (Example example in t_Set.Examples) { collection.AddExample((TextExample)example); } // P(wk|vj) = (nc+1)/(n+|Vocabulary|) // // nc: the occurence of wk in the n positions. // n: word position numbers for category vj. int nc, n; // // k: index in vacabulary; // j: index in categories int k = 0, j = 0; foreach (NaiveBayesCategory c in collection.CategorySet) { k = 0; // reset foreach (string word in voc.WordBag.Keys) { if (c.WordBag.ContainsKey(word)) nc = c.WordBag[word]; else nc = 0; n = c.WordBagOccurence; //m_prob_wk_vj[k, j] = (nc + 1.0) / (c.Count + numVocabulary); m_prob_wk_vj_log[k, j] = Math.Log((nc + 1.0) / (n + numVocabulary)); k++; // next word } j++; // next category } }
public void Train(TextClassificationProblem problem) { this.CalculateProbabilities(problem); }
public void Train(TextClassificationProblem problem) { this.CalculateProbabilities(problem); }
/// <summary> /// Calculate P(vj) and P(wk|vj) /// /// In Total: /// Variables Num /// P(vj) c /// P(wk|vj) n*c /// /// </summary> private void CalculateProbabilities(TextClassificationProblem problem) { ExampleSet t_Set; // training set int numCategory; CategoryCollection categoryCollection; Vocabulary voc; //Logging.Info("Retrieving vocabulary"); voc = problem.TrainingSetVocabulary; //Logging.Info("Retrieving training set"); t_Set = problem.TrainingSet; numCategory = problem.CategoryCount; categoryCollection = problem.CategoryCollection; //Logging.Info("Calculating probabilities"); // Step1: Calculate Probabilities // int numVocabulary = voc.Count; m_prob_vj = new double[numCategory]; m_prob_wk_vj_log = new double[numVocabulary, numCategory]; // Step2: P(vj) // for (int i = 0; i < numCategory; i++) { m_prob_vj[i] = 1.0 / numCategory; } // Step3: P(wk|vj) // NaiveBayesCategoryCollection collection = new NaiveBayesCategoryCollection(categoryCollection); foreach (Example example in t_Set.Examples) { collection.AddExample((TextExample)example); } // P(wk|vj) = (nc+1)/(n+|Vocabulary|) // // nc: the occurence of wk in the n positions. // n: word position numbers for category vj. int nc, n; // // k: index in vacabulary; // j: index in categories int k = 0, j = 0; foreach (NaiveBayesCategory c in collection.CategorySet) { k = 0; // reset foreach (string word in voc.WordBag.Keys) { if (c.WordBag.ContainsKey(word)) { nc = c.WordBag[word]; } else { nc = 0; } n = c.WordBagOccurence; //m_prob_wk_vj[k, j] = (nc + 1.0) / (c.Count + numVocabulary); m_prob_wk_vj_log[k, j] = Math.Log((nc + 1.0) / (n + numVocabulary)); k++; // next word } j++; // next category } }