private Float getNGramProbability(WordSequence wordSequence) { int num = wordSequence.size(); Word word = wordSequence.getWord(0); if (this.loader.getNumberNGrams(num) == 0 || !this.hasUnigram(word)) { return(this.getNGramProbability(wordSequence.getNewest())); } if (num < 2) { return(Float.valueOf(this.getUnigramProbability(wordSequence))); } NGramProbability ngramProbability = this.findNGram(wordSequence); if (ngramProbability != null) { return(Float.valueOf(this.ngramProbTable[num - 1][ngramProbability.getProbabilityID()])); } if (num == 2) { UnigramProbability unigram = this.getUnigram(word); UnigramProbability unigram2 = this.getUnigram(wordSequence.getWord(1)); return(Float.valueOf(unigram.getLogBackoff() + unigram2.getLogProbability())); } NGramProbability ngramProbability2 = this.findNGram(wordSequence.getOldest()); if (ngramProbability2 != null) { return(Float.valueOf(this.ngramBackoffTable[num - 1][ngramProbability2.getBackoffID()] + this.getProbability(wordSequence.getNewest()))); } return(Float.valueOf(this.getProbability(wordSequence.getNewest()))); }
private void applyUnigramWeight() { float num = this.logMath.linearToLog((double)this.unigramWeight); float num2 = this.logMath.linearToLog((double)(1f - this.unigramWeight)); float num3 = this.logMath.linearToLog((double)(1f / (float)this.numberNGrams[0])); float num4 = this.logMath.linearToLog(this.wip); float logVal = num3 + num2; for (int i = 0; i < this.numberNGrams[0]; i++) { UnigramProbability unigramProbability = this.unigrams[i]; float num5 = unigramProbability.getLogProbability(); if (i != this.startWordID) { num5 += num; num5 = this.logMath.addAsLinear(num5, logVal); } if (this.applyLanguageWeightAndWip) { num5 = num5 * this.languageWeight + num4; unigramProbability.setLogBackoff(unigramProbability.getLogBackoff() * this.languageWeight); } unigramProbability.setLogProbability(num5); } }
public int getWordID(Word word) { UnigramProbability unigram = this.getUnigram(word); if (unigram == null) { string text = new StringBuilder().append("No word ID: ").append(word).toString(); throw new IllegalArgumentException(text); } return(unigram.getWordID()); }
private float getUnigramProbability(WordSequence wordSequence) { Word word = wordSequence.getWord(0); UnigramProbability unigram = this.getUnigram(word); if (unigram == null) { string text = new StringBuilder().append("Unigram not in LM: ").append(word).toString(); throw new Error(text); } return(unigram.getLogProbability()); }
private UnigramProbability[] readUnigrams(DataInputStream dataInputStream, int num, bool flag) { UnigramProbability[] array = new UnigramProbability[num]; for (int i = 0; i < num; i++) { int num2 = this.readInt(dataInputStream, flag); if (num2 < 1) { num2 = i; } if (i != num - 1 && !BinaryLoader.assertionsDisabled && num2 != i) { throw new AssertionError(); } float logSource = this.readFloat(dataInputStream, flag); float logSource2 = this.readFloat(dataInputStream, flag); int num3 = this.readInt(dataInputStream, flag); float num4 = this.logMath.log10ToLog(logSource); float num5 = this.logMath.log10ToLog(logSource2); array[i] = new UnigramProbability(num2, num4, num5, num3); } return(array); }
private void buildSmearInfo() { double num = (double)0f; double num2 = (double)0f; this.bigramSmearMap = new HashMap(); double[] array = new double[this.unigrams.Length]; double[] array2 = new double[this.unigrams.Length]; double[] array3 = new double[this.unigrams.Length]; this.unigramSmearTerm = new float[this.unigrams.Length]; UnigramProbability[] array4 = this.unigrams; int num3 = array4.Length; for (int i = 0; i < num3; i++) { UnigramProbability unigramProbability = array4[i]; float logProbability = unigramProbability.getLogProbability(); double num4 = this.logMath.logToLinear(logProbability); num += num4 * (double)logProbability; num2 += num4 * (double)logProbability * (double)logProbability; } [email protected](new StringBuilder().append("R0 S0 ").append(num2).append(' ').append(num).toString()); for (int j = 0; j < this.loadedBigramBuffers.Length; j++) { NGramBuffer bigramBuffer = this.getBigramBuffer(j); if (bigramBuffer == null) { this.unigramSmearTerm[j] = 0f; } else { array[j] = (double)0f; array2[j] = (double)0f; array3[j] = (double)0f; float logBackoff = this.unigrams[j].getLogBackoff(); double num5 = this.logMath.logToLinear(logBackoff); int num11; double[] array6; for (int k = 0; k < bigramBuffer.getNumberNGrams(); k++) { int wordID = bigramBuffer.getWordID(k); NGramProbability ngramProbability = bigramBuffer.getNGramProbability(k); float logProbability2 = this.unigrams[wordID].getLogProbability(); float num6 = this.ngramProbTable[1][ngramProbability.getProbabilityID()]; double num7 = this.logMath.logToLinear(logProbability2); double num8 = this.logMath.logToLinear(num6); double num9 = num5 * num7; double num10 = (double)this.logMath.linearToLog(num9); double[] array5 = array; num11 = j; array6 = array5; array6[num11] += (num8 * (double)num6 - num9 * num10) * (double)logProbability2; double[] array7 = array2; num11 = j; array6 = array7; array6[num11] += (num8 - num9) * (double)logProbability2; } double[] array8 = array; num11 = j; array6 = array8; array6[num11] += num5 * ((double)logBackoff * num + num2); array3[j] = array2[j] + num5 * num; double[] array9 = array2; num11 = j; array6 = array9; array6[num11] += num5 * num2; this.unigramSmearTerm[j] = (float)(array[j] / array2[j]); } } for (int j = 0; j < this.loadedBigramBuffers.Length; j++) { [email protected](new StringBuilder().append("Processed ").append(j).append(" of ").append(this.loadedBigramBuffers.Length).toString()); NGramBuffer bigramBuffer = this.getBigramBuffer(j); if (bigramBuffer != null) { for (int i = 0; i < bigramBuffer.getNumberNGrams(); i++) { NGramProbability ngramProbability2 = bigramBuffer.getNGramProbability(i); float num12 = this.ngramBackoffTable[2][ngramProbability2.getBackoffID()]; double num13 = this.logMath.logToLinear(num12); int wordID2 = bigramBuffer.getWordID(i); NGramBuffer ngramBuffer = this.loadTrigramBuffer(j, wordID2); float num14; if (ngramBuffer == null) { num14 = this.unigramSmearTerm[wordID2]; } else { double num7 = (double)0f; double num8 = (double)0f; for (int l = 0; l < ngramBuffer.getNumberNGrams(); l++) { int wordID3 = ngramBuffer.getWordID(l); float num15 = this.ngramProbTable[2][ngramBuffer.getProbabilityID(l)]; double num16 = this.logMath.logToLinear(num15); float bigramProb = this.getBigramProb(wordID2, wordID3); double num17 = this.logMath.logToLinear(bigramProb); float logProbability3 = this.unigrams[wordID3].getLogProbability(); double num18 = num13 * num17; double num19 = (double)this.logMath.linearToLog(num18); num7 += (num16 * (double)num15 - num18 * num19) * (double)logProbability3; num8 += (num16 - num18) * (double)logProbability3 * (double)logProbability3; } num7 += num13 * ((double)num12 * array3[wordID2] - array[wordID2]); num8 += num13 * array2[wordID2]; num14 = (float)(num7 / num8); this.smearTermCount++; } this.putSmearTerm(j, wordID2, num14); } } } [email protected](new StringBuilder().append("Smear count is ").append(this.smearTermCount).toString()); }
protected internal virtual void loadModelLayout(InputStream inputStream) { DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream)); this.readHeader(dataInputStream); this.unigrams = this.readUnigrams(dataInputStream, this.numberNGrams[0] + 1, this.bigEndian); this.skipNGrams(dataInputStream); int i; for (i = 1; i < this.maxNGram; i++) { if (this.numberNGrams[i] > 0) { if (i == 1) { this.NGramProbTable[i] = this.readFloatTable(dataInputStream, this.bigEndian); } else { this.NGramBackoffTable[i] = this.readFloatTable(dataInputStream, this.bigEndian); this.NGramProbTable[i] = this.readFloatTable(dataInputStream, this.bigEndian); int j = 1 << this.logNGramSegmentSize; int num = this.numberNGrams[i - 1] + 1; int num2 = j; int num3 = ((num2 != -1) ? (num / num2) : (-num)) + 1; this.NGramSegmentTable[i] = this.readIntTable(dataInputStream, this.bigEndian, num3); } } } i = this.readInt(dataInputStream, this.bigEndian); if (i <= 0) { string text = new StringBuilder().append("Bad word string size: ").append(i).toString(); throw new Error(text); } this.words = this.readWords(dataInputStream, i, this.numberNGrams[0]); if (this.startWordID > -1) { UnigramProbability unigramProbability = this.unigrams[this.startWordID]; unigramProbability.setLogProbability(-99f); } if (this.endWordID > -1) { UnigramProbability unigramProbability = this.unigrams[this.endWordID]; unigramProbability.setLogBackoff(-99f); } this.applyUnigramWeight(); if (this.applyLanguageWeightAndWip) { for (int j = 0; j <= this.maxNGram; j++) { this.applyLanguageWeight(this.NGramProbTable[j], this.languageWeight); this.applyWip(this.NGramProbTable[j], this.wip); if (j > 1) { this.applyLanguageWeight(this.NGramBackoffTable[j], this.languageWeight); } } } dataInputStream.close(); }