Пример #1
0
        private float getBigramProb(int num, int num2)
        {
            NGramBuffer      bigramBuffer     = this.getBigramBuffer(num);
            NGramProbability ngramProbability = bigramBuffer.findNGram(num2);

            return(this.ngramProbTable[1][ngramProbability.getProbabilityID()]);
        }
Пример #2
0
        private void writeSmearInfo(string text)
        {
            DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(text));

            dataOutputStream.writeInt(-1060454374);
            [email protected](new StringBuilder().append("writing ").append(this.unigrams.Length).toString());
            dataOutputStream.writeInt(this.unigrams.Length);
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                dataOutputStream.writeFloat(this.unigramSmearTerm[i]);
            }
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                [email protected](new StringBuilder().append("Writing ").append(i).append(" of ").append(this.unigrams.Length).toString());
                NGramBuffer bigramBuffer = this.getBigramBuffer(i);
                if (bigramBuffer == null)
                {
                    dataOutputStream.writeInt(0);
                }
                else
                {
                    dataOutputStream.writeInt(bigramBuffer.getNumberNGrams());
                    for (int j = 0; j < bigramBuffer.getNumberNGrams(); j++)
                    {
                        int   wordID    = bigramBuffer.getWordID(j);
                        Float smearTerm = this.getSmearTerm(i, wordID);
                        dataOutputStream.writeInt(wordID);
                        dataOutputStream.writeFloat(smearTerm.floatValue());
                    }
                }
            }
            dataOutputStream.close();
        }
Пример #3
0
 private void clearCache()
 {
     for (int i = 0; i < this.loadedBigramBuffers.Length; i++)
     {
         NGramBuffer ngramBuffer = this.loadedBigramBuffers[i];
         if (ngramBuffer != null)
         {
             if (!ngramBuffer.getUsed())
             {
                 this.loadedBigramBuffers[i] = null;
             }
             else
             {
                 ngramBuffer.setUsed(false);
             }
         }
     }
     this.loadedBigramBuffers = new NGramBuffer[this.unigrams.Length];
     for (int i = 2; i <= this.loader.getMaxDepth(); i++)
     {
         this.loadedNGramBuffers[i - 1] = new HashMap();
     }
     this.logger.info(new StringBuilder().append("LM Cache Size: ").append(this.ngramProbCache.size()).append(" Hits: ").append(this.ngramHits).append(" Misses: ").append(this.ngramMisses).toString());
     if (this.clearCacheAfterUtterance)
     {
         this.ngramProbCache = new LRUCache(this.ngramCacheSize);
     }
 }
Пример #4
0
        private void readSmearInfo(string text)
        {
            DataInputStream dataInputStream = new DataInputStream(new FileInputStream(text));

            if (dataInputStream.readInt() != -1060454374)
            {
                dataInputStream.close();
                string text2 = new StringBuilder().append("Bad smear format for ").append(text).toString();

                throw new IOException(text2);
            }
            if (dataInputStream.readInt() != this.unigrams.Length)
            {
                dataInputStream.close();
                string text3 = new StringBuilder().append("Bad unigram length in ").append(text).toString();

                throw new IOException(text3);
            }
            this.bigramSmearMap   = new HashMap();
            this.unigramSmearTerm = new float[this.unigrams.Length];
            [email protected](new StringBuilder().append("Reading ").append(this.unigrams.Length).toString());
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                this.unigramSmearTerm[i] = dataInputStream.readFloat();
            }
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                [email protected](new StringBuilder().append("Processed ").append(i).append(" of ").append(this.loadedBigramBuffers.Length).toString());
                int         num          = dataInputStream.readInt();
                NGramBuffer bigramBuffer = this.getBigramBuffer(i);
                if (bigramBuffer.getNumberNGrams() != num)
                {
                    dataInputStream.close();
                    string text4 = new StringBuilder().append("Bad ngrams for unigram ").append(i).append(" Found ").append(num).append(" expected ").append(bigramBuffer.getNumberNGrams()).toString();

                    throw new IOException(text4);
                }
                for (int j = 0; j < num; j++)
                {
                    int wordID = bigramBuffer.getWordID(j);
                    this.putSmearTerm(i, wordID, dataInputStream.readFloat());
                }
            }
            dataInputStream.close();
        }
Пример #5
0
        private NGramBuffer getNGramBuffer(WordSequence wordSequence)
        {
            NGramBuffer ngramBuffer = null;
            int         num         = wordSequence.size();

            if (num > 1)
            {
                ngramBuffer = (NGramBuffer)this.loadedNGramBuffers[num - 1].get(wordSequence);
            }
            if (ngramBuffer == null)
            {
                ngramBuffer = this.loadNGramBuffer(wordSequence);
                if (ngramBuffer != null)
                {
                    this.loadedNGramBuffers[num - 1].put(wordSequence, ngramBuffer);
                }
            }
            return(ngramBuffer);
        }
Пример #6
0
        private NGramProbability findNGram(WordSequence wordSequence)
        {
            int num = wordSequence.size();
            NGramProbability result      = null;
            WordSequence     oldest      = wordSequence.getOldest();
            NGramBuffer      ngramBuffer = (NGramBuffer)this.loadedNGramBuffers[num - 1].get(oldest);

            if (ngramBuffer == null)
            {
                ngramBuffer = this.getNGramBuffer(oldest);
                if (ngramBuffer != null)
                {
                    this.loadedNGramBuffers[num - 1].put(oldest, ngramBuffer);
                }
            }
            if (ngramBuffer != null)
            {
                int wordID = this.getWordID(wordSequence.getWord(num - 1));
                result = ngramBuffer.findNGram(wordID);
            }
            return(result);
        }
Пример #7
0
        private void buildSmearInfo()
        {
            double num  = (double)0f;
            double num2 = (double)0f;

            this.bigramSmearMap = new HashMap();
            double[] array  = new double[this.unigrams.Length];
            double[] array2 = new double[this.unigrams.Length];
            double[] array3 = new double[this.unigrams.Length];
            this.unigramSmearTerm = new float[this.unigrams.Length];
            UnigramProbability[] array4 = this.unigrams;
            int num3 = array4.Length;

            for (int i = 0; i < num3; i++)
            {
                UnigramProbability unigramProbability = array4[i];
                float  logProbability = unigramProbability.getLogProbability();
                double num4           = this.logMath.logToLinear(logProbability);
                num  += num4 * (double)logProbability;
                num2 += num4 * (double)logProbability * (double)logProbability;
            }
            [email protected](new StringBuilder().append("R0 S0 ").append(num2).append(' ').append(num).toString());
            for (int j = 0; j < this.loadedBigramBuffers.Length; j++)
            {
                NGramBuffer bigramBuffer = this.getBigramBuffer(j);
                if (bigramBuffer == null)
                {
                    this.unigramSmearTerm[j] = 0f;
                }
                else
                {
                    array[j]  = (double)0f;
                    array2[j] = (double)0f;
                    array3[j] = (double)0f;
                    float    logBackoff = this.unigrams[j].getLogBackoff();
                    double   num5       = this.logMath.logToLinear(logBackoff);
                    int      num11;
                    double[] array6;
                    for (int k = 0; k < bigramBuffer.getNumberNGrams(); k++)
                    {
                        int wordID = bigramBuffer.getWordID(k);
                        NGramProbability ngramProbability = bigramBuffer.getNGramProbability(k);
                        float            logProbability2  = this.unigrams[wordID].getLogProbability();
                        float            num6             = this.ngramProbTable[1][ngramProbability.getProbabilityID()];
                        double           num7             = this.logMath.logToLinear(logProbability2);
                        double           num8             = this.logMath.logToLinear(num6);
                        double           num9             = num5 * num7;
                        double           num10            = (double)this.logMath.linearToLog(num9);
                        double[]         array5           = array;
                        num11          = j;
                        array6         = array5;
                        array6[num11] += (num8 * (double)num6 - num9 * num10) * (double)logProbability2;
                        double[] array7 = array2;
                        num11          = j;
                        array6         = array7;
                        array6[num11] += (num8 - num9) * (double)logProbability2;
                    }
                    double[] array8 = array;
                    num11          = j;
                    array6         = array8;
                    array6[num11] += num5 * ((double)logBackoff * num + num2);
                    array3[j]      = array2[j] + num5 * num;
                    double[] array9 = array2;
                    num11                    = j;
                    array6                   = array9;
                    array6[num11]           += num5 * num2;
                    this.unigramSmearTerm[j] = (float)(array[j] / array2[j]);
                }
            }
            for (int j = 0; j < this.loadedBigramBuffers.Length; j++)
            {
                [email protected](new StringBuilder().append("Processed ").append(j).append(" of ").append(this.loadedBigramBuffers.Length).toString());
                NGramBuffer bigramBuffer = this.getBigramBuffer(j);
                if (bigramBuffer != null)
                {
                    for (int i = 0; i < bigramBuffer.getNumberNGrams(); i++)
                    {
                        NGramProbability ngramProbability2 = bigramBuffer.getNGramProbability(i);
                        float            num12             = this.ngramBackoffTable[2][ngramProbability2.getBackoffID()];
                        double           num13             = this.logMath.logToLinear(num12);
                        int         wordID2     = bigramBuffer.getWordID(i);
                        NGramBuffer ngramBuffer = this.loadTrigramBuffer(j, wordID2);
                        float       num14;
                        if (ngramBuffer == null)
                        {
                            num14 = this.unigramSmearTerm[wordID2];
                        }
                        else
                        {
                            double num7 = (double)0f;
                            double num8 = (double)0f;
                            for (int l = 0; l < ngramBuffer.getNumberNGrams(); l++)
                            {
                                int    wordID3         = ngramBuffer.getWordID(l);
                                float  num15           = this.ngramProbTable[2][ngramBuffer.getProbabilityID(l)];
                                double num16           = this.logMath.logToLinear(num15);
                                float  bigramProb      = this.getBigramProb(wordID2, wordID3);
                                double num17           = this.logMath.logToLinear(bigramProb);
                                float  logProbability3 = this.unigrams[wordID3].getLogProbability();
                                double num18           = num13 * num17;
                                double num19           = (double)this.logMath.linearToLog(num18);
                                num7 += (num16 * (double)num15 - num18 * num19) * (double)logProbability3;
                                num8 += (num16 - num18) * (double)logProbability3 * (double)logProbability3;
                            }
                            num7 += num13 * ((double)num12 * array3[wordID2] - array[wordID2]);
                            num8 += num13 * array2[wordID2];
                            num14 = (float)(num7 / num8);
                            this.smearTermCount++;
                        }
                        this.putSmearTerm(j, wordID2, num14);
                    }
                }
            }
            [email protected](new StringBuilder().append("Smear count is ").append(this.smearTermCount).toString());
        }
Пример #8
0
        private NGramBuffer loadNGramBuffer(WordSequence wordSequence)
        {
            int wordID = this.getWordID(wordSequence.getWord(0));
            int num    = wordSequence.size() + 1;
            int num2   = this.unigrams[wordID].getFirstBigramEntry();
            int num3   = this.getNumberBigramFollowers(wordID) + 1;

            if (num3 == 1)
            {
                return(null);
            }
            int  size;
            long position;

            if (num == 2)
            {
                size     = num3 * ((this.loader.getMaxDepth() != num) ? 4 : 2) * this.loader.getBytesPerField();
                position = this.loader.getNGramOffset(num) + (long)(num2 * ((this.loader.getMaxDepth() != num) ? 4 : 2) * this.loader.getBytesPerField());
            }
            else
            {
                int         wordID2     = this.getWordID(wordSequence.getWord(wordSequence.size() - 1));
                NGramBuffer ngramBuffer = this.getNGramBuffer(wordSequence.getOldest());
                int         num4        = ngramBuffer.findNGramIndex(wordID2);
                if (num4 == -1)
                {
                    return(null);
                }
                int firstNGramEntry = ngramBuffer.getFirstNGramEntry();
                num2 = this.getFirstNGramEntry(ngramBuffer.getNGramProbability(num4), firstNGramEntry, num);
                int firstNGramEntry2 = this.getFirstNGramEntry(ngramBuffer.getNGramProbability(num4 + 1), firstNGramEntry, num);
                num3 = firstNGramEntry2 - num2;
                if (num3 == 0)
                {
                    return(null);
                }
                if (this.loader.getMaxDepth() != num)
                {
                    num3++;
                }
                size     = num3 * ((this.loader.getMaxDepth() != num) ? 4 : 2) * this.loader.getBytesPerField();
                position = this.loader.getNGramOffset(num) + (long)num2 * ((this.loader.getMaxDepth() != num) ? 4L : 2L) * (long)this.loader.getBytesPerField();
            }
            NGramBuffer result;

            try
            {
                byte[] array = this.loader.loadBuffer(position, size);
                if (this.loader.getMaxDepth() == num)
                {
                    result = new NMaxGramBuffer(array, num3, this.loader.getBigEndian(), this.is32bits(), num, num2);
                }
                else
                {
                    result = new NGramBuffer(array, num3, this.loader.getBigEndian(), this.is32bits(), num, num2);
                }
            }
            catch (IOException ex)
            {
                throw new Error(new StringBuilder().append("Error loading ").append(num).append("-Grams.").toString(), ex);
            }
            return(result);
        }