Beispiel #1
0
        //Calculate the probability of given text
        public LMResult GetSentProb(string strText, int order)
        {
            LMResult LMRst       = new LMResult();
            int      calcWordNum = 0;

            //Append EOS into the sentence
            strText = strText + " EOS";

            string[] items   = strText.Split();
            int      wordNum = items.Length;

            for (int i = 0; i < wordNum; i++)
            {
                string words = String.Join(" ", items, 0, i + 1);
                // calc prob of ngram[j,i]
                int    start = (i > order - 1) ? (i - order + 1) : 0;
                double prob  = 0.0;

                if (lm_ngram_prob(words, start, i, ref prob) == 0)
                {
                    LMRst.logProb += prob;
                }
                else
                {
                    LMRst.oovs++;
                }
                calcWordNum++;
            }

            int denom = calcWordNum - LMRst.oovs;

            LMRst.perplexity = log2prob(-LMRst.logProb / denom);
            return(LMRst);
        }
Beispiel #2
0
        public List <SynResult> GetSynonym(string strQuery, int begin, int len)
        {
            List <SynResult> synRstList = new List <SynResult>();

            //Get candidate synonym term
            string strTerm = strQuery.Substring(begin, len);

            //Word breaking strQuery
            string strWBQuery = WordSegment(strQuery);
            //Get sentence probability by language model

            //Call RNN language model
            RnnLMResult lmResult_rnn = lmDecoder_rnn.GetSentProb(strWBQuery);

            //Call Ngram language model
            LMDecoder.LMResult lmResult = lmDecoder.GetSentProb(strWBQuery, lmOrder);


            SynResult synRst = new SynResult();

            synRst.strTerm     = strTerm;
            synRst.lmScore     = (int)(lmResult.perplexity / 1.0);
            synRst.lmScore_rnn = (int)(lmResult_rnn.perplexity / 1.0);
            synRst.llr         = -1.0;
            synRstList.Add(synRst);

            if (synPair.ContainsKey(strTerm) == false)
            {
                return(synRstList);
            }

            string strLCtx = strQuery.Substring(0, begin);
            string strRCtx = strQuery.Substring(begin + len);

            foreach (SynContextSet ctx in synPair[strTerm])
            {
                //Replace the term with its synonym term
                string strText = strLCtx + ctx.strTerm + strRCtx;
                //Word breaking strQuery
                strWBQuery = WordSegment(strText);
                //Get sentence probability by language model
                //Call RNN language model
                lmResult_rnn = lmDecoder_rnn.GetSentProb(strWBQuery);
                //Call ngram language model
                lmResult = lmDecoder.GetSentProb(strWBQuery, lmOrder);


                synRst             = new SynResult();
                synRst.strTerm     = ctx.strTerm;
                synRst.lmScore     = (int)(lmResult.perplexity / 1.0);
                synRst.lmScore_rnn = (int)(lmResult_rnn.perplexity / 1.0);
                synRst.llr         = ctx.llr;
                synRstList.Add(synRst);
            }

            synRstList.Sort(CompareLMResult);

            return(synRstList);
        }