private float getBigramProb(int num, int num2) { NGramBuffer bigramBuffer = this.getBigramBuffer(num); NGramProbability ngramProbability = bigramBuffer.findNGram(num2); return(this.ngramProbTable[1][ngramProbability.getProbabilityID()]); }
private void writeSmearInfo(string text) { DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(text)); dataOutputStream.writeInt(-1060454374); [email protected](new StringBuilder().append("writing ").append(this.unigrams.Length).toString()); dataOutputStream.writeInt(this.unigrams.Length); for (int i = 0; i < this.unigrams.Length; i++) { dataOutputStream.writeFloat(this.unigramSmearTerm[i]); } for (int i = 0; i < this.unigrams.Length; i++) { [email protected](new StringBuilder().append("Writing ").append(i).append(" of ").append(this.unigrams.Length).toString()); NGramBuffer bigramBuffer = this.getBigramBuffer(i); if (bigramBuffer == null) { dataOutputStream.writeInt(0); } else { dataOutputStream.writeInt(bigramBuffer.getNumberNGrams()); for (int j = 0; j < bigramBuffer.getNumberNGrams(); j++) { int wordID = bigramBuffer.getWordID(j); Float smearTerm = this.getSmearTerm(i, wordID); dataOutputStream.writeInt(wordID); dataOutputStream.writeFloat(smearTerm.floatValue()); } } } dataOutputStream.close(); }
private void clearCache() { for (int i = 0; i < this.loadedBigramBuffers.Length; i++) { NGramBuffer ngramBuffer = this.loadedBigramBuffers[i]; if (ngramBuffer != null) { if (!ngramBuffer.getUsed()) { this.loadedBigramBuffers[i] = null; } else { ngramBuffer.setUsed(false); } } } this.loadedBigramBuffers = new NGramBuffer[this.unigrams.Length]; for (int i = 2; i <= this.loader.getMaxDepth(); i++) { this.loadedNGramBuffers[i - 1] = new HashMap(); } this.logger.info(new StringBuilder().append("LM Cache Size: ").append(this.ngramProbCache.size()).append(" Hits: ").append(this.ngramHits).append(" Misses: ").append(this.ngramMisses).toString()); if (this.clearCacheAfterUtterance) { this.ngramProbCache = new LRUCache(this.ngramCacheSize); } }
private void readSmearInfo(string text) { DataInputStream dataInputStream = new DataInputStream(new FileInputStream(text)); if (dataInputStream.readInt() != -1060454374) { dataInputStream.close(); string text2 = new StringBuilder().append("Bad smear format for ").append(text).toString(); throw new IOException(text2); } if (dataInputStream.readInt() != this.unigrams.Length) { dataInputStream.close(); string text3 = new StringBuilder().append("Bad unigram length in ").append(text).toString(); throw new IOException(text3); } this.bigramSmearMap = new HashMap(); this.unigramSmearTerm = new float[this.unigrams.Length]; [email protected](new StringBuilder().append("Reading ").append(this.unigrams.Length).toString()); for (int i = 0; i < this.unigrams.Length; i++) { this.unigramSmearTerm[i] = dataInputStream.readFloat(); } for (int i = 0; i < this.unigrams.Length; i++) { [email protected](new StringBuilder().append("Processed ").append(i).append(" of ").append(this.loadedBigramBuffers.Length).toString()); int num = dataInputStream.readInt(); NGramBuffer bigramBuffer = this.getBigramBuffer(i); if (bigramBuffer.getNumberNGrams() != num) { dataInputStream.close(); string text4 = new StringBuilder().append("Bad ngrams for unigram ").append(i).append(" Found ").append(num).append(" expected ").append(bigramBuffer.getNumberNGrams()).toString(); throw new IOException(text4); } for (int j = 0; j < num; j++) { int wordID = bigramBuffer.getWordID(j); this.putSmearTerm(i, wordID, dataInputStream.readFloat()); } } dataInputStream.close(); }
private NGramBuffer getNGramBuffer(WordSequence wordSequence) { NGramBuffer ngramBuffer = null; int num = wordSequence.size(); if (num > 1) { ngramBuffer = (NGramBuffer)this.loadedNGramBuffers[num - 1].get(wordSequence); } if (ngramBuffer == null) { ngramBuffer = this.loadNGramBuffer(wordSequence); if (ngramBuffer != null) { this.loadedNGramBuffers[num - 1].put(wordSequence, ngramBuffer); } } return(ngramBuffer); }
private NGramProbability findNGram(WordSequence wordSequence) { int num = wordSequence.size(); NGramProbability result = null; WordSequence oldest = wordSequence.getOldest(); NGramBuffer ngramBuffer = (NGramBuffer)this.loadedNGramBuffers[num - 1].get(oldest); if (ngramBuffer == null) { ngramBuffer = this.getNGramBuffer(oldest); if (ngramBuffer != null) { this.loadedNGramBuffers[num - 1].put(oldest, ngramBuffer); } } if (ngramBuffer != null) { int wordID = this.getWordID(wordSequence.getWord(num - 1)); result = ngramBuffer.findNGram(wordID); } return(result); }
private void buildSmearInfo() { double num = (double)0f; double num2 = (double)0f; this.bigramSmearMap = new HashMap(); double[] array = new double[this.unigrams.Length]; double[] array2 = new double[this.unigrams.Length]; double[] array3 = new double[this.unigrams.Length]; this.unigramSmearTerm = new float[this.unigrams.Length]; UnigramProbability[] array4 = this.unigrams; int num3 = array4.Length; for (int i = 0; i < num3; i++) { UnigramProbability unigramProbability = array4[i]; float logProbability = unigramProbability.getLogProbability(); double num4 = this.logMath.logToLinear(logProbability); num += num4 * (double)logProbability; num2 += num4 * (double)logProbability * (double)logProbability; } [email protected](new StringBuilder().append("R0 S0 ").append(num2).append(' ').append(num).toString()); for (int j = 0; j < this.loadedBigramBuffers.Length; j++) { NGramBuffer bigramBuffer = this.getBigramBuffer(j); if (bigramBuffer == null) { this.unigramSmearTerm[j] = 0f; } else { array[j] = (double)0f; array2[j] = (double)0f; array3[j] = (double)0f; float logBackoff = this.unigrams[j].getLogBackoff(); double num5 = this.logMath.logToLinear(logBackoff); int num11; double[] array6; for (int k = 0; k < bigramBuffer.getNumberNGrams(); k++) { int wordID = bigramBuffer.getWordID(k); NGramProbability ngramProbability = bigramBuffer.getNGramProbability(k); float logProbability2 = this.unigrams[wordID].getLogProbability(); float num6 = this.ngramProbTable[1][ngramProbability.getProbabilityID()]; double num7 = this.logMath.logToLinear(logProbability2); double num8 = this.logMath.logToLinear(num6); double num9 = num5 * num7; double num10 = (double)this.logMath.linearToLog(num9); double[] array5 = array; num11 = j; array6 = array5; array6[num11] += (num8 * (double)num6 - num9 * num10) * (double)logProbability2; double[] array7 = array2; num11 = j; array6 = array7; array6[num11] += (num8 - num9) * (double)logProbability2; } double[] array8 = array; num11 = j; array6 = array8; array6[num11] += num5 * ((double)logBackoff * num + num2); array3[j] = array2[j] + num5 * num; double[] array9 = array2; num11 = j; array6 = array9; array6[num11] += num5 * num2; this.unigramSmearTerm[j] = (float)(array[j] / array2[j]); } } for (int j = 0; j < this.loadedBigramBuffers.Length; j++) { [email protected](new StringBuilder().append("Processed ").append(j).append(" of ").append(this.loadedBigramBuffers.Length).toString()); NGramBuffer bigramBuffer = this.getBigramBuffer(j); if (bigramBuffer != null) { for (int i = 0; i < bigramBuffer.getNumberNGrams(); i++) { NGramProbability ngramProbability2 = bigramBuffer.getNGramProbability(i); float num12 = this.ngramBackoffTable[2][ngramProbability2.getBackoffID()]; double num13 = this.logMath.logToLinear(num12); int wordID2 = bigramBuffer.getWordID(i); NGramBuffer ngramBuffer = this.loadTrigramBuffer(j, wordID2); float num14; if (ngramBuffer == null) { num14 = this.unigramSmearTerm[wordID2]; } else { double num7 = (double)0f; double num8 = (double)0f; for (int l = 0; l < ngramBuffer.getNumberNGrams(); l++) { int wordID3 = ngramBuffer.getWordID(l); float num15 = this.ngramProbTable[2][ngramBuffer.getProbabilityID(l)]; double num16 = this.logMath.logToLinear(num15); float bigramProb = this.getBigramProb(wordID2, wordID3); double num17 = this.logMath.logToLinear(bigramProb); float logProbability3 = this.unigrams[wordID3].getLogProbability(); double num18 = num13 * num17; double num19 = (double)this.logMath.linearToLog(num18); num7 += (num16 * (double)num15 - num18 * num19) * (double)logProbability3; num8 += (num16 - num18) * (double)logProbability3 * (double)logProbability3; } num7 += num13 * ((double)num12 * array3[wordID2] - array[wordID2]); num8 += num13 * array2[wordID2]; num14 = (float)(num7 / num8); this.smearTermCount++; } this.putSmearTerm(j, wordID2, num14); } } } [email protected](new StringBuilder().append("Smear count is ").append(this.smearTermCount).toString()); }
private NGramBuffer loadNGramBuffer(WordSequence wordSequence) { int wordID = this.getWordID(wordSequence.getWord(0)); int num = wordSequence.size() + 1; int num2 = this.unigrams[wordID].getFirstBigramEntry(); int num3 = this.getNumberBigramFollowers(wordID) + 1; if (num3 == 1) { return(null); } int size; long position; if (num == 2) { size = num3 * ((this.loader.getMaxDepth() != num) ? 4 : 2) * this.loader.getBytesPerField(); position = this.loader.getNGramOffset(num) + (long)(num2 * ((this.loader.getMaxDepth() != num) ? 4 : 2) * this.loader.getBytesPerField()); } else { int wordID2 = this.getWordID(wordSequence.getWord(wordSequence.size() - 1)); NGramBuffer ngramBuffer = this.getNGramBuffer(wordSequence.getOldest()); int num4 = ngramBuffer.findNGramIndex(wordID2); if (num4 == -1) { return(null); } int firstNGramEntry = ngramBuffer.getFirstNGramEntry(); num2 = this.getFirstNGramEntry(ngramBuffer.getNGramProbability(num4), firstNGramEntry, num); int firstNGramEntry2 = this.getFirstNGramEntry(ngramBuffer.getNGramProbability(num4 + 1), firstNGramEntry, num); num3 = firstNGramEntry2 - num2; if (num3 == 0) { return(null); } if (this.loader.getMaxDepth() != num) { num3++; } size = num3 * ((this.loader.getMaxDepth() != num) ? 4 : 2) * this.loader.getBytesPerField(); position = this.loader.getNGramOffset(num) + (long)num2 * ((this.loader.getMaxDepth() != num) ? 4L : 2L) * (long)this.loader.getBytesPerField(); } NGramBuffer result; try { byte[] array = this.loader.loadBuffer(position, size); if (this.loader.getMaxDepth() == num) { result = new NMaxGramBuffer(array, num3, this.loader.getBigEndian(), this.is32bits(), num, num2); } else { result = new NGramBuffer(array, num3, this.loader.getBigEndian(), this.is32bits(), num, num2); } } catch (IOException ex) { throw new Error(new StringBuilder().append("Error loading ").append(num).append("-Grams.").toString(), ex); } return(result); }