public override void Predict(TextClassificationProblem problem, TextExample text, ref ClassificationResult result) { int j = 0, k; // j is index in categories, while k is index in vocabulary. double logv; Vocabulary voc; voc = problem.TrainingSetVocabulary; foreach (Category c in m_CategoryCollection.Collection) { logv = 0.0; // reset logv += Math.Log(m_prob_vj[j]); // for all the word (token) in the text foreach (string token in text.Tokens.Keys) { if (voc.WordBag.ContainsKey(token)) { // Get the position of this token // in m_TotalTrainingSetTokens. k = voc.WordPositionMap[token]; // Look up the probability in the table. logv += m_prob_wk_vj_log[k, j]; } } result.CategoryName2LogVMap.Add(c.Name, logv); j++; // next category } result.Normalize(); }
public void AddExample(TextExample example) { foreach (string word in example.Tokens.Keys) { Utility.AddToken(m_WordCountPairCollection, word, example.Tokens[word]); m_WordBagOccurence += example.Tokens[word]; } }
private static void BuildExample(TextExample example, Vocabulary voc, int exampleCount) { int dimension = voc.Count; SparseVector vector = new SparseVector(dimension); foreach (string word in example.Tokens.Keys) { int pos = voc.GetWordPosition(word); if (pos == Constants.KEY_NOT_FOUND) continue; // phi i(x) = tfi log(idfi) /k // tfi: number of occurences of the term i in the document x // idfi: the ratio between the total number of documents and the // number of documents containing the term // k: normalisation constant ensuring that ||phi|| = 1 double phi = example.Tokens[word] * Math.Log(exampleCount / voc.WordExampleOccurMap[word]); vector.Components.Add(pos, phi); } vector.Normalize(); example.X = vector; }
public void AddExample(TextExample example) { m_CategoryMap[example.Label].AddExample(example); }