Пример #1
0
        private void writeSmearInfo(string text)
        {
            DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(text));

            dataOutputStream.writeInt(-1060454374);
            [email protected](new StringBuilder().append("writing ").append(this.unigrams.Length).toString());
            dataOutputStream.writeInt(this.unigrams.Length);
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                dataOutputStream.writeFloat(this.unigramSmearTerm[i]);
            }
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                [email protected](new StringBuilder().append("Writing ").append(i).append(" of ").append(this.unigrams.Length).toString());
                NGramBuffer bigramBuffer = this.getBigramBuffer(i);
                if (bigramBuffer == null)
                {
                    dataOutputStream.writeInt(0);
                }
                else
                {
                    dataOutputStream.writeInt(bigramBuffer.getNumberNGrams());
                    for (int j = 0; j < bigramBuffer.getNumberNGrams(); j++)
                    {
                        int   wordID    = bigramBuffer.getWordID(j);
                        Float smearTerm = this.getSmearTerm(i, wordID);
                        dataOutputStream.writeInt(wordID);
                        dataOutputStream.writeFloat(smearTerm.floatValue());
                    }
                }
            }
            dataOutputStream.close();
        }
Пример #2
0
        private void readSmearInfo(string text)
        {
            DataInputStream dataInputStream = new DataInputStream(new FileInputStream(text));

            if (dataInputStream.readInt() != -1060454374)
            {
                dataInputStream.close();
                string text2 = new StringBuilder().append("Bad smear format for ").append(text).toString();

                throw new IOException(text2);
            }
            if (dataInputStream.readInt() != this.unigrams.Length)
            {
                dataInputStream.close();
                string text3 = new StringBuilder().append("Bad unigram length in ").append(text).toString();

                throw new IOException(text3);
            }
            this.bigramSmearMap   = new HashMap();
            this.unigramSmearTerm = new float[this.unigrams.Length];
            [email protected](new StringBuilder().append("Reading ").append(this.unigrams.Length).toString());
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                this.unigramSmearTerm[i] = dataInputStream.readFloat();
            }
            for (int i = 0; i < this.unigrams.Length; i++)
            {
                [email protected](new StringBuilder().append("Processed ").append(i).append(" of ").append(this.loadedBigramBuffers.Length).toString());
                int         num          = dataInputStream.readInt();
                NGramBuffer bigramBuffer = this.getBigramBuffer(i);
                if (bigramBuffer.getNumberNGrams() != num)
                {
                    dataInputStream.close();
                    string text4 = new StringBuilder().append("Bad ngrams for unigram ").append(i).append(" Found ").append(num).append(" expected ").append(bigramBuffer.getNumberNGrams()).toString();

                    throw new IOException(text4);
                }
                for (int j = 0; j < num; j++)
                {
                    int wordID = bigramBuffer.getWordID(j);
                    this.putSmearTerm(i, wordID, dataInputStream.readFloat());
                }
            }
            dataInputStream.close();
        }
Пример #3
0
        private void buildSmearInfo()
        {
            double num  = (double)0f;
            double num2 = (double)0f;

            this.bigramSmearMap = new HashMap();
            double[] array  = new double[this.unigrams.Length];
            double[] array2 = new double[this.unigrams.Length];
            double[] array3 = new double[this.unigrams.Length];
            this.unigramSmearTerm = new float[this.unigrams.Length];
            UnigramProbability[] array4 = this.unigrams;
            int num3 = array4.Length;

            for (int i = 0; i < num3; i++)
            {
                UnigramProbability unigramProbability = array4[i];
                float  logProbability = unigramProbability.getLogProbability();
                double num4           = this.logMath.logToLinear(logProbability);
                num  += num4 * (double)logProbability;
                num2 += num4 * (double)logProbability * (double)logProbability;
            }
            [email protected](new StringBuilder().append("R0 S0 ").append(num2).append(' ').append(num).toString());
            for (int j = 0; j < this.loadedBigramBuffers.Length; j++)
            {
                NGramBuffer bigramBuffer = this.getBigramBuffer(j);
                if (bigramBuffer == null)
                {
                    this.unigramSmearTerm[j] = 0f;
                }
                else
                {
                    array[j]  = (double)0f;
                    array2[j] = (double)0f;
                    array3[j] = (double)0f;
                    float    logBackoff = this.unigrams[j].getLogBackoff();
                    double   num5       = this.logMath.logToLinear(logBackoff);
                    int      num11;
                    double[] array6;
                    for (int k = 0; k < bigramBuffer.getNumberNGrams(); k++)
                    {
                        int wordID = bigramBuffer.getWordID(k);
                        NGramProbability ngramProbability = bigramBuffer.getNGramProbability(k);
                        float            logProbability2  = this.unigrams[wordID].getLogProbability();
                        float            num6             = this.ngramProbTable[1][ngramProbability.getProbabilityID()];
                        double           num7             = this.logMath.logToLinear(logProbability2);
                        double           num8             = this.logMath.logToLinear(num6);
                        double           num9             = num5 * num7;
                        double           num10            = (double)this.logMath.linearToLog(num9);
                        double[]         array5           = array;
                        num11          = j;
                        array6         = array5;
                        array6[num11] += (num8 * (double)num6 - num9 * num10) * (double)logProbability2;
                        double[] array7 = array2;
                        num11          = j;
                        array6         = array7;
                        array6[num11] += (num8 - num9) * (double)logProbability2;
                    }
                    double[] array8 = array;
                    num11          = j;
                    array6         = array8;
                    array6[num11] += num5 * ((double)logBackoff * num + num2);
                    array3[j]      = array2[j] + num5 * num;
                    double[] array9 = array2;
                    num11                    = j;
                    array6                   = array9;
                    array6[num11]           += num5 * num2;
                    this.unigramSmearTerm[j] = (float)(array[j] / array2[j]);
                }
            }
            for (int j = 0; j < this.loadedBigramBuffers.Length; j++)
            {
                [email protected](new StringBuilder().append("Processed ").append(j).append(" of ").append(this.loadedBigramBuffers.Length).toString());
                NGramBuffer bigramBuffer = this.getBigramBuffer(j);
                if (bigramBuffer != null)
                {
                    for (int i = 0; i < bigramBuffer.getNumberNGrams(); i++)
                    {
                        NGramProbability ngramProbability2 = bigramBuffer.getNGramProbability(i);
                        float            num12             = this.ngramBackoffTable[2][ngramProbability2.getBackoffID()];
                        double           num13             = this.logMath.logToLinear(num12);
                        int         wordID2     = bigramBuffer.getWordID(i);
                        NGramBuffer ngramBuffer = this.loadTrigramBuffer(j, wordID2);
                        float       num14;
                        if (ngramBuffer == null)
                        {
                            num14 = this.unigramSmearTerm[wordID2];
                        }
                        else
                        {
                            double num7 = (double)0f;
                            double num8 = (double)0f;
                            for (int l = 0; l < ngramBuffer.getNumberNGrams(); l++)
                            {
                                int    wordID3         = ngramBuffer.getWordID(l);
                                float  num15           = this.ngramProbTable[2][ngramBuffer.getProbabilityID(l)];
                                double num16           = this.logMath.logToLinear(num15);
                                float  bigramProb      = this.getBigramProb(wordID2, wordID3);
                                double num17           = this.logMath.logToLinear(bigramProb);
                                float  logProbability3 = this.unigrams[wordID3].getLogProbability();
                                double num18           = num13 * num17;
                                double num19           = (double)this.logMath.linearToLog(num18);
                                num7 += (num16 * (double)num15 - num18 * num19) * (double)logProbability3;
                                num8 += (num16 - num18) * (double)logProbability3 * (double)logProbability3;
                            }
                            num7 += num13 * ((double)num12 * array3[wordID2] - array[wordID2]);
                            num8 += num13 * array2[wordID2];
                            num14 = (float)(num7 / num8);
                            this.smearTermCount++;
                        }
                        this.putSmearTerm(j, wordID2, num14);
                    }
                }
            }
            [email protected](new StringBuilder().append("Smear count is ").append(this.smearTermCount).toString());
        }