//Load language model from specific file public void LoadLM(string strFileName) { //Load prob & back off values StreamReader srLM = new StreamReader(strFileName + ".prob"); BinaryReader br = new BinaryReader(srLM.BaseStream); lm_prob = new VarBigArrayNoCMP <NGram>(1024000); long index = 0; try { while (true) { NGram ngram = new NGram(); ngram.prob = br.ReadSingle(); ngram.bow = br.ReadSingle(); lm_prob[index] = ngram; index++; } } catch (EndOfStreamException err) { br.Close(); } daSearch.Load(strFileName + ".da"); }
private int lm_ngram_prob(string strText, int start, int end, ref double probability) { NGram lm_ngram = new NGram(); // get the longest ngram conditional prob in LM int j; for (j = start; j <= end; j++) { string words = GenerateNGram(strText, j); int offset = daSearch.SearchByPerfectMatch(words); if (offset >= 0) { lm_ngram = lm_prob[offset]; break; } } if (j > end) { return(1);// OOV } else if (j == start) { probability = lm_ngram.prob; return(0); // exact ngram in LM } double prob = lm_ngram.prob; double bow = 0; // get bows starting from the longest ngram prob to the original ngram // exclude the last word, set temp buffer end string[] ngrams = strText.Split(); strText = String.Join(" ", ngrams, 0, ngrams.Length - 1); for (j--; j >= start; j--) { string words = GenerateNGram(strText, j); int offset = daSearch.SearchByPerfectMatch(words); if (offset < 0) { break; } bow += lm_prob[offset].bow; } probability = prob + bow; return(0); }