Exemple #1
0
        public override float CalculateScore(IData feature)
        {
            _mixtureComponentSet.UpdateTopScores(feature);
            float ascore = 0;

            for (var i = 0; i < MixtureWeights.StreamsNum; i++)
            {
                var logTotal = LogMath.LogZero;
                for (var j = 0; j < _mixtureComponentSet.TopGauNum; j++)
                {
                    var topGauScore    = _mixtureComponentSet.GetTopGauScore(i, j);
                    var topGauId       = _mixtureComponentSet.GetTopGauId(i, j);
                    var mixtureWeightx = MixtureWeights.Get(_Id, i, topGauId);
                    logTotal = LogMath.AddAsLinear(logTotal, topGauScore + mixtureWeightx);
                }
                ascore += logTotal;
            }
            return(ascore);
        }
Exemple #2
0
        public override float CalculateScore(IData feature)
        {
            if (feature is DoubleData)
                this.LogInfo("DoubleData conversion required on mixture level!");

            var featureVector = FloatData.ToFloatData(feature).Values;

            var logTotal = LogMath.LogZero;
            for (var i = 0; i < _mixtureComponents.Length; i++)
            {
                // In linear form, this would be:
                //
                // Total += Mixture[i].score * MixtureWeight[i]
                logTotal = LogMath.AddAsLinear(logTotal,
                        _mixtureComponents[i].GetScore(featureVector) + MixtureWeights.Get(_Id, 0, i));
            }

            return logTotal;
        }
Exemple #3
0
        /// <summary>
        /// Apply the unigram weight to the set of unigrams
        /// </summary>
        private void ApplyUnigramWeight()
        {
            var logUnigramWeight    = _logMath.LinearToLog(_unigramWeight);
            var logNotUnigramWeight = _logMath.LinearToLog(1.0f - _unigramWeight);
            var logUniform          = _logMath.LinearToLog(1.0f / (_numberNGrams[0]));

            var logWip = _logMath.LinearToLog(_wip);

            var p2 = logUniform + logNotUnigramWeight;

            for (var i = 0; i < _numberNGrams[0]; i++)
            {
                var unigram = Unigrams[i];

                var p1 = unigram.LogProbability;

                if (i != _startWordID)
                {
                    p1 += logUnigramWeight;
                    p1  = _logMath.AddAsLinear(p1, p2);
                }

                if (_applyLanguageWeightAndWip)
                {
                    p1 = p1 * _languageWeight + logWip;
                    unigram.SetLogBackoff(unigram.LogBackoff * _languageWeight);
                }

                if (unigram.WordID == 2520)
                {
                    this.LogInfo("TEST");
                }

                unigram.SetLogProbability(p1);
            }
        }
        /// <summary>
        /// Loads the language model from the given location.
        /// </summary>
        /// <param name="location">The URL location of the model.</param>
        /// <param name="unigramWeightValue">The unigram weight.</param>
        /// <param name="dictionaryValue">The dictionary.</param>
        private void Load(URL location, float unigramWeightValue, IDictionary dictionaryValue)
        {
            string line;
            float  logUnigramWeight        = _logMath.LinearToLog(unigramWeightValue);
            float  inverseLogUnigramWeight = _logMath
                                             .LinearToLog(1.0 - unigramWeightValue);

            Open(location);
            // look for beginning of data
            ReadUntil("\\data\\");
            // look for ngram statements
            List <int> ngramList = new List <int>();

            while ((line = ReadLine()) != null)
            {
                if (line.StartsWith("ngram"))
                {
                    StringTokenizer st = new StringTokenizer(line, " \t\n\r\f=");
                    if (st.countTokens() != 3)
                    {
                        Corrupt("corrupt ngram field " + line + ' '
                                + st.countTokens());
                    }
                    st.nextToken();
                    int index = int.Parse(st.nextToken(), CultureInfo.InvariantCulture.NumberFormat);
                    int count = int.Parse(st.nextToken(), CultureInfo.InvariantCulture.NumberFormat);
                    ngramList.Insert(index - 1, count);
                    MaxDepth = Math.Max(index, MaxDepth);
                }
                else if (line.Equals("\\1-grams:"))
                {
                    break;
                }
            }
            int numUnigrams = ngramList[0] - 1;
            // -log(x) = log(1/x)
            float logUniformProbability = -_logMath.LinearToLog(numUnigrams);

            for (int index = 0; index < ngramList.Count; index++)
            {
                int ngram      = index + 1;
                int ngramCount = ngramList[index];
                for (int i = 0; i < ngramCount; i++)
                {
                    StringTokenizer tok        = new StringTokenizer(ReadLine());
                    int             tokenCount = tok.countTokens();
                    if (tokenCount != ngram + 1 && tokenCount != ngram + 2)
                    {
                        Corrupt("Bad format");
                    }
                    float log10Prob    = float.Parse(tok.nextToken(), CultureInfo.InvariantCulture.NumberFormat);
                    float log10Backoff = 0.0f;
                    // construct the WordSequence for this N-Gram
                    List <Word> wordList = new List <Word>(MaxDepth);
                    for (int j = 0; j < ngram; j++)
                    {
                        string word = tok.nextToken();
                        _vocabulary.Add(word);
                        Word wordObject = dictionaryValue.GetWord(word);
                        if (wordObject == null)
                        {
                            wordObject = Word.Unknown;
                        }
                        wordList.Add(wordObject);
                    }
                    WordSequence wordSequence = new WordSequence(wordList);
                    if (tok.hasMoreTokens())
                    {
                        log10Backoff = float.Parse(tok.nextToken(), CultureInfo.InvariantCulture.NumberFormat);
                    }
                    float logProb    = _logMath.Log10ToLog(log10Prob);
                    float logBackoff = _logMath.Log10ToLog(log10Backoff);
                    // Apply unigram weights if this is a unigram probability
                    if (ngram == 1)
                    {
                        float p1 = logProb + logUnigramWeight;
                        float p2 = logUniformProbability + inverseLogUnigramWeight;
                        logProb = _logMath.AddAsLinear(p1, p2);
                        // System.out
                        // .println("p1 " + p1 + " p2 " + p2 + " luw "
                        // + logUnigramWeight + " iluw "
                        // + inverseLogUnigramWeight + " lup "
                        // + logUniformProbability + " logprog "
                        // + logProb);
                    }
                    Put(wordSequence, logProb, logBackoff);
                }
                if (index < ngramList.Count - 1)
                {
                    string next = "\\" + (ngram + 1) + "-grams:";
                    ReadUntil(next);
                }
            }
            ReadUntil("\\end\\");
            Close();
        }
        public override void Allocate()
        {
            _vocabulary.Clear();
            _logProbs.Clear();
            _logBackoffs.Clear();
            HashMap <WordSequence, Integer> unigrams = new HashMap <WordSequence, Integer>();
            HashMap <WordSequence, Integer> bigrams  = new HashMap <WordSequence, Integer>();
            HashMap <WordSequence, Integer> trigrams = new HashMap <WordSequence, Integer>();
            int wordCount = 0;

            foreach (string sentence in _sentences)
            {
                string[] textWords = sentence.Split("\\s+");
                var      words     = new List <Word>();
                words.Add(_dictionary.GetSentenceStartWord());
                foreach (String wordString in textWords)
                {
                    if (wordString.Length == 0)
                    {
                        continue;
                    }
                    _vocabulary.Add(wordString);
                    Word word = _dictionary.GetWord(wordString);
                    if (word == null)
                    {
                        words.Add(Word.Unknown);
                    }
                    else
                    {
                        words.Add(word);
                    }
                }
                words.Add(_dictionary.GetSentenceEndWord());

                if (words.Count > 0)
                {
                    AddSequence(unigrams, new WordSequence(words[0]));
                    wordCount++;
                }

                if (words.Count > 1)
                {
                    wordCount++;
                    AddSequence(unigrams, new WordSequence(words[1]));
                    AddSequence(bigrams, new WordSequence(words[0], words[1]));
                }

                for (int i = 2; i < words.Count; ++i)
                {
                    wordCount++;
                    AddSequence(unigrams, new WordSequence(words[i]));
                    AddSequence(bigrams, new WordSequence(words[i - 1], words[i]));
                    AddSequence(trigrams, new WordSequence(words[i - 2], words[i - 1], words[i]));
                }
            }

            float discount = .5f;
            float deflate  = 1 - discount;
            var   uniprobs = new HashMap <WordSequence, Float>();

            foreach (var e in unigrams)
            {
                uniprobs.Put(e.Key, e.Value * deflate / wordCount);
            }

            LogMath lmath               = LogMath.GetLogMath();
            float   logUnigramWeight    = lmath.LinearToLog(_unigramWeight);
            float   invLogUnigramWeight = lmath.LinearToLog(1 - _unigramWeight);
            float   logUniformProb      = -lmath.LinearToLog(uniprobs.Count);

            var          sorted1Grams = new SortedSet <WordSequence>(unigrams.Keys);
            var          iter         = new SortedSet <WordSequence>(bigrams.KeySet()).GetEnumerator();
            WordSequence ws           = iter.MoveNext() ? iter.Current : null;

            foreach (WordSequence unigram in sorted1Grams)
            {
                float p = lmath.LinearToLog(uniprobs.Get(unigram));
                p += logUnigramWeight;
                p  = lmath.AddAsLinear(p, logUniformProb + invLogUnigramWeight);
                _logProbs.Put(unigram, p);

                float sum = 0f;
                while (ws != null)
                {
                    int cmp = ws.GetOldest().CompareTo(unigram);
                    if (cmp > 0)
                    {
                        break;
                    }
                    if (cmp == 0)
                    {
                        sum += uniprobs.Get(ws.GetNewest());
                    }
                    ws = iter.MoveNext() ? iter.Current : null;
                }

                _logBackoffs.Put(unigram, lmath.LinearToLog(discount / (1 - sum)));
            }

            var biprobs = new HashMap <WordSequence, Float>();

            foreach (var entry in bigrams)
            {
                int unigramCount = unigrams.Get(entry.Key.GetOldest());
                biprobs.Put(entry.Key, entry.Value * deflate / unigramCount);
            }

            var sorted2Grams = new SortedSet <WordSequence>(bigrams.KeySet());

            iter = new SortedSet <WordSequence>(trigrams.KeySet()).GetEnumerator();
            ws   = iter.MoveNext() ? iter.Current : null;
            foreach (WordSequence biword in sorted2Grams)
            {
                _logProbs.Put(biword, lmath.LinearToLog(biprobs.Get(biword)));

                float sum = 0f;
                while (ws != null)
                {
                    int cmp = ws.GetOldest().CompareTo(biword);
                    if (cmp > 0)
                    {
                        break;
                    }
                    if (cmp == 0)
                    {
                        sum += biprobs.Get(ws.GetNewest());
                    }
                    ws = iter.MoveNext() ? iter.Current : null;
                }
                _logBackoffs.Put(biword, lmath.LinearToLog(discount / (1 - sum)));
            }

            foreach (var e in trigrams)
            {
                float p = e.Value * deflate;
                p /= bigrams.Get(e.Key.GetOldest());
                _logProbs.Put(e.Key, lmath.LinearToLog(p));
            }
        }
Exemple #6
0
        /**
         * /// Compute the utterance-level posterior for every node in the lattice, i.e. the probability that this node occurs
         * /// on any path through the lattice. Uses a forward-backward algorithm specific to the nature of non-looping
         * /// left-to-right lattice structures.
         * /// <p/>
         * /// Node posteriors can be retrieved by calling getPosterior() on Node objects.
         *
         * /// @param languageModelWeightAdjustment   the weight multiplier that will be applied to language score already scaled by language weight
         * /// @param useAcousticScoresOnly use only the acoustic scores to compute the posteriors, ignore the language weight
         * ///                              and scores
         */
        public void ComputeNodePosteriors(float languageModelWeightAdjustment, bool useAcousticScoresOnly)
        {
            if (InitialNode == null)
            {
                return;
            }
            //forward
            InitialNode.ForwardScore = LogMath.LogOne;
            InitialNode.ViterbiScore = LogMath.LogOne;
            var sortedNodes = SortNodes();

            Debug.Assert(sortedNodes[0] == InitialNode);
            foreach (var currentNode in sortedNodes)
            {
                foreach (var edge in currentNode.LeavingEdges)
                {
                    var forwardProb = edge.FromNode.ForwardScore;
                    var edgeScore   = ComputeEdgeScore
                                          (edge, languageModelWeightAdjustment, useAcousticScoresOnly);
                    forwardProb += edgeScore;
                    edge.ToNode.ForwardScore = LogMath.AddAsLinear
                                                   ((float)forwardProb,
                                                   (float)edge.ToNode.ForwardScore);
                    var vs = edge.FromNode.ViterbiScore +
                             edgeScore;
                    if (edge.ToNode.BestPredecessor == null ||
                        vs > edge.ToNode.ViterbiScore)
                    {
                        edge.ToNode.BestPredecessor = currentNode;
                        edge.ToNode.ViterbiScore    = vs;
                    }
                }
            }

            //backward
            TerminalNode.BackwardScore = LogMath.LogOne;
            Debug.Assert(sortedNodes[sortedNodes.Count - 1] == TerminalNode);
            //var n = sortedNodes.GetEnumerator().ListIterator(sortedNodes.Count-1);

            var n = sortedNodes.Count;

            while (n > 0)
            {
                var currentNode  = sortedNodes[--n]; //TODO: Check behavior
                var currentEdges = currentNode.LeavingEdges;
                foreach (var edge in currentEdges)
                {
                    var backwardProb = edge.ToNode.BackwardScore;
                    backwardProb += ComputeEdgeScore
                                        (edge, languageModelWeightAdjustment, useAcousticScoresOnly);
                    edge.FromNode.BackwardScore = LogMath.AddAsLinear((float)backwardProb,
                                                                      (float)edge.FromNode.BackwardScore);
                }
            }

            //inner
            var normalizationFactor = TerminalNode.ForwardScore;

            foreach (var node in Nodes.Values)
            {
                node.Posterior = (node.ForwardScore +
                                  node.BackwardScore) - normalizationFactor;
            }
        }