コード例 #1
0
        /// <summary>This method should never return 0!</summary>
        private double ProbMorphTag(int tagId, int morphId)
        {
            double cM  = morphTag.TotalCount(morphId);
            double cMT = morphTag.GetCount(morphId, tagId);
            // p_M
            double p_M = cM / morphTag.TotalCount();
            // p_T
            double cTseen = tagCounter.GetCount(tagId);
            double p_T    = cTseen / tagCounter.TotalCount();
            double p_M_T  = 0.0;

            if (cM > 100.0 && cMT > 0.0)
            {
                double p_T_M = cMT / cM;
                //      else {
                //        double cTunseen = morphTagUnseen.getCount(tagId);
                //        double p_T_U = cTunseen / morphTagUnseen.totalCount();
                //        p_T_M = (cMT + smooth[1]*p_T_U) / (cM + smooth[1]);
                //      }
                p_M_T = p_T_M * p_M / p_T;
            }
            else
            {
                // Unseen morphological analysis
                // Hack....unseen morph tags are extremely rare
                // Add+1 smoothing
                p_M_T = 1.0 / (morphTag.TotalCount() + tagIndex.Size() + 1.0);
            }
            return(p_M_T);
        }
コード例 #2
0
        /// <summary>This method should never return 0!!</summary>
        private double ProbLemmaTag(string word, int loc, int tagId, int lemmaId)
        {
            double cL  = lemmaTag.TotalCount(lemmaId);
            double cLT = lemmaTag.GetCount(lemmaId, tagId);
            // p_L
            double p_L = cL / lemmaTag.TotalCount();
            // p_T
            double cTseen = tagCounter.GetCount(tagId);
            double p_T    = cTseen / tagCounter.TotalCount();
            // p_T_L
            double p_L_T = 0.0;

            if (cL > 0.0)
            {
                // Seen lemma
                double p_T_L = 0.0;
                if (cL > 100.0 && cLT > 0.0)
                {
                    p_T_L = cLT / cL;
                }
                else
                {
                    double cTunseen = lemmaTagUnseen.GetCount(tagId);
                    // TODO(spenceg): p_T_U is 0??
                    double p_T_U = cTunseen / lemmaTagUnseen.TotalCount();
                    p_T_L = (cLT + smooth[1] * p_T_U) / (cL + smooth[1]);
                }
                p_L_T = p_T_L * p_L / p_T;
            }
            else
            {
                // Unseen lemma. Score based on the word signature (of the surface form)
                // Hack
                double cTunseen = lemmaTagUnseen.GetCount(tagId);
                p_L_T = cTunseen / tagCounter.TotalCount();
            }
            //      int wordId = wordIndex.indexOf(word);
            //      IntTaggedWord iTW = new IntTaggedWord(wordId, tagId);
            //      double c_T = tagCounter.getCount(tagId);
            //      p_L_T = Math.exp(getUnknownWordModel().score(iTW, loc, c_T, tagCounter.totalCount(), smooth[0], word));
            return(p_L_T);
        }
コード例 #3
0
        private double ProbWordTag(string word, int loc, int wordId, int tagId)
        {
            double cW  = wordTag.TotalCount(wordId);
            double cWT = wordTag.GetCount(wordId, tagId);
            // p_L
            double p_W = cW / wordTag.TotalCount();
            // p_T
            double cTseen = tagCounter.GetCount(tagId);
            double p_T    = cTseen / tagCounter.TotalCount();
            // p_T_L
            double p_W_T = 0.0;

            if (cW > 0.0)
            {
                // Seen lemma
                double p_T_W = 0.0;
                if (cW > 100.0 && cWT > 0.0)
                {
                    p_T_W = cWT / cW;
                }
                else
                {
                    double cTunseen = wordTagUnseen.GetCount(tagId);
                    // TODO p_T_U is 0?
                    double p_T_U = cTunseen / wordTagUnseen.TotalCount();
                    p_T_W = (cWT + smooth[1] * p_T_U) / (cW + smooth[1]);
                }
                p_W_T = p_T_W * p_W / p_T;
            }
            else
            {
                // Unseen word. Score based on the word signature (of the surface form)
                IntTaggedWord iTW = new IntTaggedWord(wordId, tagId);
                double        c_T = tagCounter.GetCount(tagId);
                p_W_T = Math.Exp(GetUnknownWordModel().Score(iTW, loc, c_T, tagCounter.TotalCount(), smooth[0], word));
            }
            return(p_W_T);
        }