/// <summary>This method should never return 0!</summary> private double ProbMorphTag(int tagId, int morphId) { double cM = morphTag.TotalCount(morphId); double cMT = morphTag.GetCount(morphId, tagId); // p_M double p_M = cM / morphTag.TotalCount(); // p_T double cTseen = tagCounter.GetCount(tagId); double p_T = cTseen / tagCounter.TotalCount(); double p_M_T = 0.0; if (cM > 100.0 && cMT > 0.0) { double p_T_M = cMT / cM; // else { // double cTunseen = morphTagUnseen.getCount(tagId); // double p_T_U = cTunseen / morphTagUnseen.totalCount(); // p_T_M = (cMT + smooth[1]*p_T_U) / (cM + smooth[1]); // } p_M_T = p_T_M * p_M / p_T; } else { // Unseen morphological analysis // Hack....unseen morph tags are extremely rare // Add+1 smoothing p_M_T = 1.0 / (morphTag.TotalCount() + tagIndex.Size() + 1.0); } return(p_M_T); }
/// <summary>This method should never return 0!!</summary> private double ProbLemmaTag(string word, int loc, int tagId, int lemmaId) { double cL = lemmaTag.TotalCount(lemmaId); double cLT = lemmaTag.GetCount(lemmaId, tagId); // p_L double p_L = cL / lemmaTag.TotalCount(); // p_T double cTseen = tagCounter.GetCount(tagId); double p_T = cTseen / tagCounter.TotalCount(); // p_T_L double p_L_T = 0.0; if (cL > 0.0) { // Seen lemma double p_T_L = 0.0; if (cL > 100.0 && cLT > 0.0) { p_T_L = cLT / cL; } else { double cTunseen = lemmaTagUnseen.GetCount(tagId); // TODO(spenceg): p_T_U is 0?? double p_T_U = cTunseen / lemmaTagUnseen.TotalCount(); p_T_L = (cLT + smooth[1] * p_T_U) / (cL + smooth[1]); } p_L_T = p_T_L * p_L / p_T; } else { // Unseen lemma. Score based on the word signature (of the surface form) // Hack double cTunseen = lemmaTagUnseen.GetCount(tagId); p_L_T = cTunseen / tagCounter.TotalCount(); } // int wordId = wordIndex.indexOf(word); // IntTaggedWord iTW = new IntTaggedWord(wordId, tagId); // double c_T = tagCounter.getCount(tagId); // p_L_T = Math.exp(getUnknownWordModel().score(iTW, loc, c_T, tagCounter.totalCount(), smooth[0], word)); return(p_L_T); }
private double ProbWordTag(string word, int loc, int wordId, int tagId) { double cW = wordTag.TotalCount(wordId); double cWT = wordTag.GetCount(wordId, tagId); // p_L double p_W = cW / wordTag.TotalCount(); // p_T double cTseen = tagCounter.GetCount(tagId); double p_T = cTseen / tagCounter.TotalCount(); // p_T_L double p_W_T = 0.0; if (cW > 0.0) { // Seen lemma double p_T_W = 0.0; if (cW > 100.0 && cWT > 0.0) { p_T_W = cWT / cW; } else { double cTunseen = wordTagUnseen.GetCount(tagId); // TODO p_T_U is 0? double p_T_U = cTunseen / wordTagUnseen.TotalCount(); p_T_W = (cWT + smooth[1] * p_T_U) / (cW + smooth[1]); } p_W_T = p_T_W * p_W / p_T; } else { // Unseen word. Score based on the word signature (of the surface form) IntTaggedWord iTW = new IntTaggedWord(wordId, tagId); double c_T = tagCounter.GetCount(tagId); p_W_T = Math.Exp(GetUnknownWordModel().Score(iTW, loc, c_T, tagCounter.TotalCount(), smooth[0], word)); } return(p_W_T); }