Exemplo n.º 1
0
        //Load language model from specific file
        public void LoadLM(string strFileName)
        {
            //Load prob & back off values
            StreamReader srLM = new StreamReader(strFileName + ".prob");
            BinaryReader br   = new BinaryReader(srLM.BaseStream);

            lm_prob = new VarBigArrayNoCMP <NGram>(1024000);
            long index = 0;

            try
            {
                while (true)
                {
                    NGram ngram = new NGram();
                    ngram.prob     = br.ReadSingle();
                    ngram.bow      = br.ReadSingle();
                    lm_prob[index] = ngram;
                    index++;
                }
            }
            catch (EndOfStreamException err)
            {
                br.Close();
            }


            daSearch.Load(strFileName + ".da");
        }
Exemplo n.º 2
0
        private int lm_ngram_prob(string strText, int start, int end, ref double probability)
        {
            NGram lm_ngram = new NGram();

            // get the longest ngram conditional prob in LM
            int j;

            for (j = start; j <= end; j++)
            {
                string words = GenerateNGram(strText, j);

                int offset = daSearch.SearchByPerfectMatch(words);
                if (offset >= 0)
                {
                    lm_ngram = lm_prob[offset];
                    break;
                }
            }

            if (j > end)
            {
                return(1);// OOV
            }
            else if (j == start)
            {
                probability = lm_ngram.prob;
                return(0);                       // exact ngram in LM
            }

            double prob = lm_ngram.prob;
            double bow  = 0;

            // get bows starting from the longest ngram prob to the original ngram
            // exclude the last word, set temp buffer end
            string[] ngrams = strText.Split();
            strText = String.Join(" ", ngrams, 0, ngrams.Length - 1);


            for (j--; j >= start; j--)
            {
                string words  = GenerateNGram(strText, j);
                int    offset = daSearch.SearchByPerfectMatch(words);
                if (offset < 0)
                {
                    break;
                }

                bow += lm_prob[offset].bow;
            }
            probability = prob + bow;
            return(0);
        }