public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result) { int j = 0, k; // j is index in categories, while k is index in vocabulary. double logv; Vocabulary voc; voc = problem.TrainingSetVocabulary; foreach (Category c in m_CategoryCollection.Collection) { logv = 0.0; // reset logv += Math.Log(m_prob_vj[j]); // for all the word (token) in the text foreach (string token in text.Tokens.Keys) { if (voc.WordBag.ContainsKey(token)) { // Get the position of this token // in m_TotalTrainingSetTokens. k = voc.WordPositionMap[token]; // Look up the probability in the table. logv += m_prob_wk_vj_log[k, j]; } } result.CategoryName2LogVMap.Add(c.Name, logv); j++; // next category } result.Normalize(); }
public double CrossValidate() { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = this.m_problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; //Logging.Info("Cross Validating on validation set"); foreach (Example example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); if (this.PredictText(example) == example.Label.Id) { numCorrect++; } } double correctRatio = 1.0 * numCorrect / numExample; Logger.Info(string.Format("Correct ratio: {0}", correctRatio)); return correctRatio; }
public void CrossValidate(TextClassificationProblem problem, out double correctRatio, out double falsePositive, out double falseNegative) { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; int numFP = 0, numFN = 0; //Logging.Info("Cross Validating on validation set"); foreach (TextExample example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); this.Predict(problem, example, ref result); string res = this.MinCostClassName(result); if (res == example.Label.Name) { numCorrect++; } else if (res == className[0] && example.Label.Name == className[1]) { numFP++; } else if (res == className[1] && example.Label.Name == className[0]) { numFN++; } } correctRatio = 1.0 * numCorrect / numExample; falsePositive = 1.0 * numFP / numExample; falseNegative = 1.0 * numFN / numExample; Logger.Info("Correct ratio: {0}", correctRatio); Logger.Info("False Positive: {0}", falsePositive); Logger.Info("False Negative: {0}", falseNegative); }
public double CrossValidate(TextClassificationProblem problem) { ExampleSet v_Set; // validation set //Logging.Info("Retrieving validation set"); v_Set = problem.ValidationSet; int numExample = v_Set.Examples.Count; int numCorrect = 0; //Logging.Info("Cross Validating on validation set"); foreach (TextExample example in v_Set.Examples) { ClassificationResult result = new ClassificationResult(); this.Predict(problem, example, ref result); if (result.Vnb == example.Label.Name) { numCorrect++; } } double correctRatio = 1.0 * numCorrect / numExample; Logger.Info(string.Format("Correct ratio: {0}", correctRatio)); return correctRatio; }
private string MinCostClassName(ClassificationResult result) { double[] cost = new double[2]; for (int i = 0; i < 2; i++) { cost[i] = 0; for (int j = 0; j < 2; j++) { cost[i] += costMat[i, j] * result.CategoryName2LogVMap[className[j]]; } } // find classname with minimized cost if (cost[0] <= cost[1]) return className[0]; else return className[1]; }
public void PredictText(ExampleSet t_Set, Example text, ref ClassificationResult result) { double f; f = Calculate_F(t_Set, text.X); if (f >= 0) { result.ResultCategoryId = +1; } else { result.ResultCategoryId = -1; } }