Ejemplo n.º 1
0
        private Float getNGramProbability(WordSequence wordSequence)
        {
            int  num  = wordSequence.size();
            Word word = wordSequence.getWord(0);

            if (this.loader.getNumberNGrams(num) == 0 || !this.hasUnigram(word))
            {
                return(this.getNGramProbability(wordSequence.getNewest()));
            }
            if (num < 2)
            {
                return(Float.valueOf(this.getUnigramProbability(wordSequence)));
            }
            NGramProbability ngramProbability = this.findNGram(wordSequence);

            if (ngramProbability != null)
            {
                return(Float.valueOf(this.ngramProbTable[num - 1][ngramProbability.getProbabilityID()]));
            }
            if (num == 2)
            {
                UnigramProbability unigram  = this.getUnigram(word);
                UnigramProbability unigram2 = this.getUnigram(wordSequence.getWord(1));
                return(Float.valueOf(unigram.getLogBackoff() + unigram2.getLogProbability()));
            }
            NGramProbability ngramProbability2 = this.findNGram(wordSequence.getOldest());

            if (ngramProbability2 != null)
            {
                return(Float.valueOf(this.ngramBackoffTable[num - 1][ngramProbability2.getBackoffID()] + this.getProbability(wordSequence.getNewest())));
            }
            return(Float.valueOf(this.getProbability(wordSequence.getNewest())));
        }
Ejemplo n.º 2
0
        private void applyUnigramWeight()
        {
            float num    = this.logMath.linearToLog((double)this.unigramWeight);
            float num2   = this.logMath.linearToLog((double)(1f - this.unigramWeight));
            float num3   = this.logMath.linearToLog((double)(1f / (float)this.numberNGrams[0]));
            float num4   = this.logMath.linearToLog(this.wip);
            float logVal = num3 + num2;

            for (int i = 0; i < this.numberNGrams[0]; i++)
            {
                UnigramProbability unigramProbability = this.unigrams[i];
                float num5 = unigramProbability.getLogProbability();
                if (i != this.startWordID)
                {
                    num5 += num;
                    num5  = this.logMath.addAsLinear(num5, logVal);
                }
                if (this.applyLanguageWeightAndWip)
                {
                    num5 = num5 * this.languageWeight + num4;
                    unigramProbability.setLogBackoff(unigramProbability.getLogBackoff() * this.languageWeight);
                }
                unigramProbability.setLogProbability(num5);
            }
        }
Ejemplo n.º 3
0
        public int getWordID(Word word)
        {
            UnigramProbability unigram = this.getUnigram(word);

            if (unigram == null)
            {
                string text = new StringBuilder().append("No word ID: ").append(word).toString();

                throw new IllegalArgumentException(text);
            }
            return(unigram.getWordID());
        }
Ejemplo n.º 4
0
        private float getUnigramProbability(WordSequence wordSequence)
        {
            Word word = wordSequence.getWord(0);
            UnigramProbability unigram = this.getUnigram(word);

            if (unigram == null)
            {
                string text = new StringBuilder().append("Unigram not in LM: ").append(word).toString();

                throw new Error(text);
            }
            return(unigram.getLogProbability());
        }
Ejemplo n.º 5
0
 private UnigramProbability[] readUnigrams(DataInputStream dataInputStream, int num, bool flag)
 {
     UnigramProbability[] array = new UnigramProbability[num];
     for (int i = 0; i < num; i++)
     {
         int num2 = this.readInt(dataInputStream, flag);
         if (num2 < 1)
         {
             num2 = i;
         }
         if (i != num - 1 && !BinaryLoader.assertionsDisabled && num2 != i)
         {
             throw new AssertionError();
         }
         float logSource  = this.readFloat(dataInputStream, flag);
         float logSource2 = this.readFloat(dataInputStream, flag);
         int   num3       = this.readInt(dataInputStream, flag);
         float num4       = this.logMath.log10ToLog(logSource);
         float num5       = this.logMath.log10ToLog(logSource2);
         array[i] = new UnigramProbability(num2, num4, num5, num3);
     }
     return(array);
 }
Ejemplo n.º 6
0
        private void buildSmearInfo()
        {
            double num  = (double)0f;
            double num2 = (double)0f;

            this.bigramSmearMap = new HashMap();
            double[] array  = new double[this.unigrams.Length];
            double[] array2 = new double[this.unigrams.Length];
            double[] array3 = new double[this.unigrams.Length];
            this.unigramSmearTerm = new float[this.unigrams.Length];
            UnigramProbability[] array4 = this.unigrams;
            int num3 = array4.Length;

            for (int i = 0; i < num3; i++)
            {
                UnigramProbability unigramProbability = array4[i];
                float  logProbability = unigramProbability.getLogProbability();
                double num4           = this.logMath.logToLinear(logProbability);
                num  += num4 * (double)logProbability;
                num2 += num4 * (double)logProbability * (double)logProbability;
            }
            [email protected](new StringBuilder().append("R0 S0 ").append(num2).append(' ').append(num).toString());
            for (int j = 0; j < this.loadedBigramBuffers.Length; j++)
            {
                NGramBuffer bigramBuffer = this.getBigramBuffer(j);
                if (bigramBuffer == null)
                {
                    this.unigramSmearTerm[j] = 0f;
                }
                else
                {
                    array[j]  = (double)0f;
                    array2[j] = (double)0f;
                    array3[j] = (double)0f;
                    float    logBackoff = this.unigrams[j].getLogBackoff();
                    double   num5       = this.logMath.logToLinear(logBackoff);
                    int      num11;
                    double[] array6;
                    for (int k = 0; k < bigramBuffer.getNumberNGrams(); k++)
                    {
                        int wordID = bigramBuffer.getWordID(k);
                        NGramProbability ngramProbability = bigramBuffer.getNGramProbability(k);
                        float            logProbability2  = this.unigrams[wordID].getLogProbability();
                        float            num6             = this.ngramProbTable[1][ngramProbability.getProbabilityID()];
                        double           num7             = this.logMath.logToLinear(logProbability2);
                        double           num8             = this.logMath.logToLinear(num6);
                        double           num9             = num5 * num7;
                        double           num10            = (double)this.logMath.linearToLog(num9);
                        double[]         array5           = array;
                        num11          = j;
                        array6         = array5;
                        array6[num11] += (num8 * (double)num6 - num9 * num10) * (double)logProbability2;
                        double[] array7 = array2;
                        num11          = j;
                        array6         = array7;
                        array6[num11] += (num8 - num9) * (double)logProbability2;
                    }
                    double[] array8 = array;
                    num11          = j;
                    array6         = array8;
                    array6[num11] += num5 * ((double)logBackoff * num + num2);
                    array3[j]      = array2[j] + num5 * num;
                    double[] array9 = array2;
                    num11                    = j;
                    array6                   = array9;
                    array6[num11]           += num5 * num2;
                    this.unigramSmearTerm[j] = (float)(array[j] / array2[j]);
                }
            }
            for (int j = 0; j < this.loadedBigramBuffers.Length; j++)
            {
                [email protected](new StringBuilder().append("Processed ").append(j).append(" of ").append(this.loadedBigramBuffers.Length).toString());
                NGramBuffer bigramBuffer = this.getBigramBuffer(j);
                if (bigramBuffer != null)
                {
                    for (int i = 0; i < bigramBuffer.getNumberNGrams(); i++)
                    {
                        NGramProbability ngramProbability2 = bigramBuffer.getNGramProbability(i);
                        float            num12             = this.ngramBackoffTable[2][ngramProbability2.getBackoffID()];
                        double           num13             = this.logMath.logToLinear(num12);
                        int         wordID2     = bigramBuffer.getWordID(i);
                        NGramBuffer ngramBuffer = this.loadTrigramBuffer(j, wordID2);
                        float       num14;
                        if (ngramBuffer == null)
                        {
                            num14 = this.unigramSmearTerm[wordID2];
                        }
                        else
                        {
                            double num7 = (double)0f;
                            double num8 = (double)0f;
                            for (int l = 0; l < ngramBuffer.getNumberNGrams(); l++)
                            {
                                int    wordID3         = ngramBuffer.getWordID(l);
                                float  num15           = this.ngramProbTable[2][ngramBuffer.getProbabilityID(l)];
                                double num16           = this.logMath.logToLinear(num15);
                                float  bigramProb      = this.getBigramProb(wordID2, wordID3);
                                double num17           = this.logMath.logToLinear(bigramProb);
                                float  logProbability3 = this.unigrams[wordID3].getLogProbability();
                                double num18           = num13 * num17;
                                double num19           = (double)this.logMath.linearToLog(num18);
                                num7 += (num16 * (double)num15 - num18 * num19) * (double)logProbability3;
                                num8 += (num16 - num18) * (double)logProbability3 * (double)logProbability3;
                            }
                            num7 += num13 * ((double)num12 * array3[wordID2] - array[wordID2]);
                            num8 += num13 * array2[wordID2];
                            num14 = (float)(num7 / num8);
                            this.smearTermCount++;
                        }
                        this.putSmearTerm(j, wordID2, num14);
                    }
                }
            }
            [email protected](new StringBuilder().append("Smear count is ").append(this.smearTermCount).toString());
        }
Ejemplo n.º 7
0
        protected internal virtual void loadModelLayout(InputStream inputStream)
        {
            DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(inputStream));

            this.readHeader(dataInputStream);
            this.unigrams = this.readUnigrams(dataInputStream, this.numberNGrams[0] + 1, this.bigEndian);
            this.skipNGrams(dataInputStream);
            int i;

            for (i = 1; i < this.maxNGram; i++)
            {
                if (this.numberNGrams[i] > 0)
                {
                    if (i == 1)
                    {
                        this.NGramProbTable[i] = this.readFloatTable(dataInputStream, this.bigEndian);
                    }
                    else
                    {
                        this.NGramBackoffTable[i] = this.readFloatTable(dataInputStream, this.bigEndian);
                        this.NGramProbTable[i]    = this.readFloatTable(dataInputStream, this.bigEndian);
                        int j    = 1 << this.logNGramSegmentSize;
                        int num  = this.numberNGrams[i - 1] + 1;
                        int num2 = j;
                        int num3 = ((num2 != -1) ? (num / num2) : (-num)) + 1;
                        this.NGramSegmentTable[i] = this.readIntTable(dataInputStream, this.bigEndian, num3);
                    }
                }
            }
            i = this.readInt(dataInputStream, this.bigEndian);
            if (i <= 0)
            {
                string text = new StringBuilder().append("Bad word string size: ").append(i).toString();

                throw new Error(text);
            }
            this.words = this.readWords(dataInputStream, i, this.numberNGrams[0]);
            if (this.startWordID > -1)
            {
                UnigramProbability unigramProbability = this.unigrams[this.startWordID];
                unigramProbability.setLogProbability(-99f);
            }
            if (this.endWordID > -1)
            {
                UnigramProbability unigramProbability = this.unigrams[this.endWordID];
                unigramProbability.setLogBackoff(-99f);
            }
            this.applyUnigramWeight();
            if (this.applyLanguageWeightAndWip)
            {
                for (int j = 0; j <= this.maxNGram; j++)
                {
                    this.applyLanguageWeight(this.NGramProbTable[j], this.languageWeight);
                    this.applyWip(this.NGramProbTable[j], this.wip);
                    if (j > 1)
                    {
                        this.applyLanguageWeight(this.NGramBackoffTable[j], this.languageWeight);
                    }
                }
            }
            dataInputStream.close();
        }