Exemple #1
0
        private float[] ComputePosterios(float[] componentScores, int numStreams)
        {
            float[] posteriors = componentScores;

            int step     = componentScores.Length / numStreams;
            int startIdx = 0;

            for (int i = 0; i < numStreams; i++)
            {
                float max = posteriors[startIdx];
                for (int j = startIdx + 1; j < startIdx + step; j++)
                {
                    if (posteriors[j] > max)
                    {
                        max = posteriors[j];
                    }
                }

                for (int j = startIdx; j < startIdx + step; j++)
                {
                    posteriors[j] = (float)_logMath.LogToLinear(posteriors[j] - max);
                }
                startIdx += step;
            }

            return(posteriors);
        }
Exemple #2
0
        /// <summary>
        /// Gets the (linearly scaled) mixture weights of the component densities
        /// </summary>
        /// <returns></returns>
        public float[] GetComponentWeights()
        {
            var mixWeights = new float[MixtureComponents.Length];
            for (var i = 0; i < mixWeights.Length; i++)
                mixWeights[i] = (float)LogMath.LogToLinear(MixtureWeights.Get(_Id, 0, i));

            return mixWeights;
        }
        private void BuildSmearInfo()
        {
            double s0 = 0;
            double r0 = 0;

            _bigramSmearMap = new Dictionary <long, Float>();

            var ugNumerator   = new double[_unigrams.Length];
            var ugDenominator = new double[_unigrams.Length];
            var ugAvgLogProb  = new double[_unigrams.Length];

            _unigramSmearTerm = new float[_unigrams.Length];

            foreach (var unigram in _unigrams)
            {
                var logp = unigram.LogProbability;
                var p    = LogMath.LogToLinear(logp);
                s0 += p * logp;
                r0 += p * logp * logp;
            }

            this.LogInfo("R0 S0 " + r0 + ' ' + s0);

            for (var i = 0; i < _loadedBigramBuffers.Length; i++)
            {
                var bigram = GetBigramBuffer(i);

                if (bigram == null)
                {
                    _unigramSmearTerm[i] = LogMath.LogOne;
                    continue;
                }

                ugNumerator[i]   = 0.0;
                ugDenominator[i] = 0.0;
                ugAvgLogProb[i]  = 0.0;

                var logugbackoff = _unigrams[i].LogBackoff;
                var ugbackoff    = LogMath.LogToLinear(logugbackoff);

                for (var j = 0; j < bigram.NumberNGrams; j++)
                {
                    var wordID = bigram.GetWordID(j);
                    var bgProb = bigram.GetNGramProbability(j);

                    var logugprob = _unigrams[wordID].LogProbability;
                    var logbgprob = _ngramProbTable[1][bgProb.ProbabilityID];

                    var ugprob = LogMath.LogToLinear(logugprob);
                    var bgprob = LogMath.LogToLinear(logbgprob);

                    var    backoffbgprob    = ugbackoff * ugprob;
                    double logbackoffbgprob = LogMath.LinearToLog(backoffbgprob);

                    ugNumerator[i] +=
                        (bgprob * logbgprob
                         - backoffbgprob * logbackoffbgprob) * logugprob;

                    ugDenominator[i] += (bgprob - backoffbgprob) * logugprob;
                    // dumpProbs(ugNumerator, ugDenominator, i, j, logugprob,
                    // logbgprob, ugprob, bgprob, backoffbgprob,
                    // logbackoffbgprob);
                }

                ugNumerator[i]   += ugbackoff * (logugbackoff * s0 + r0);
                ugAvgLogProb[i]   = ugDenominator[i] + ugbackoff * s0;
                ugDenominator[i] += ugbackoff * r0;

                // System.out.println("n/d " + ugNumerator[i] + " " +
                // ugDenominator[i]);

                _unigramSmearTerm[i] = (float)(ugNumerator[i] / ugDenominator[i]);
                // / unigramSmearTerm[i] =
                // logMath.linearToLog(ugNumerator[i] / ugDenominator[i]);
                // System.out.println("ugs " + unigramSmearTerm[i]);
            }

            for (var i = 0; i < _loadedBigramBuffers.Length; i++)
            {
                this.LogInfo("Processed " + i
                             + " of " + _loadedBigramBuffers.Length);
                var bigram = GetBigramBuffer(i);

                if (bigram == null)
                {
                    continue;
                }

                for (var j = 0; j < bigram.NumberNGrams; j++)
                {
                    float smearTerm;
                    var   bgProb       = bigram.GetNGramProbability(j);
                    var   logbgbackoff =
                        _ngramBackoffTable[2][bgProb.BackoffID];
                    var bgbackoff = LogMath.LogToLinear(logbgbackoff);
                    var k         = bigram.GetWordID(j);
                    var trigram   = LoadTrigramBuffer(i, k);

                    if (trigram == null)
                    {
                        smearTerm = _unigramSmearTerm[k];
                    }
                    else
                    {
                        double bgNumerator   = 0;
                        double bgDenominator = 0;
                        for (var l = 0; l < trigram.NumberNGrams; l++)
                        {
                            var m         = trigram.GetWordID(l);
                            var logtgprob =
                                _ngramProbTable[2][trigram.GetProbabilityID(l)];
                            var    tgprob           = LogMath.LogToLinear(logtgprob);
                            var    logbgprob        = GetBigramProb(k, m);
                            var    bgprob           = LogMath.LogToLinear(logbgprob);
                            var    logugprob        = _unigrams[m].LogProbability;
                            var    backofftgprob    = bgbackoff * bgprob;
                            double logbackofftgprob =
                                LogMath.LinearToLog(backofftgprob);

                            bgNumerator += (tgprob * logtgprob - backofftgprob
                                            * logbackofftgprob) * logugprob;

                            bgDenominator += (tgprob - backofftgprob) * logugprob
                                             * logugprob;
                        }

                        bgNumerator +=
                            bgbackoff * (logbgbackoff * ugAvgLogProb[k] - ugNumerator[k]);
                        bgDenominator += bgbackoff * ugDenominator[k];
                        // bigram.ugsmear = bg_numerator / bg_denominator;
                        smearTerm = (float)(bgNumerator / bgDenominator);
                        _smearTermCount++;
                    }

                    PutSmearTerm(i, k, smearTerm);
                }
            }

            this.LogInfo("Smear count is " + _smearTermCount);
        }