/// <summary>
        /// Goes through the active list of tokens and expands each token,
        /// finding the set of successor tokens until all the successor tokens are emitting tokens.
        /// </summary>
        protected void GrowBranches()
        {
            GrowTimer.Start();
            var relativeBeamThreshold = ActiveList.GetBeamThreshold();

            //this.LogInfo("Frame: " + currentFrameNumber
            //            + " thresh : " + relativeBeamThreshold + " bs "
            //            + activeList.getBestScore() + " tok "
            //            + activeList.getBestToken());
            this.LogDebug("RelativeBeamThreshold: {0}", relativeBeamThreshold.ToString("R"));
            var tokenList = ActiveList;

            foreach (var token in tokenList)
            {
                if (token == null)
                {
                    break;
                }
                if (token.Score >= relativeBeamThreshold && AllowExpansion(token))
                {
                    CollectSuccessorTokens(token);
                }
            }


            //this.LogDebug(string.Format("ActiveList:{0} ",activeList.Count()));
            GrowTimer.Stop();
        }
示例#2
0
        /**
         * /// Goes through the active list of tokens and expands each token, finding the set of successor tokens until all the
         * /// successor tokens are emitting tokens.
         */
        protected void GrowBranches()
        {
            int mapSize = ActiveList.Size * 10;

            if (mapSize == 0)
            {
                mapSize = 1;
            }
            GrowTimer.Start();
            BestTokenMap = new HashMap <ISearchState, Token>(mapSize);
            ActiveList oldActiveList = ActiveList;

            ResultList     = new List <Token>();
            ActiveList     = ActiveListFactory.NewInstance();
            _threshold     = oldActiveList.GetBeamThreshold();
            _wordThreshold = oldActiveList.GetBestScore() + _logRelativeWordBeamWidth;

            foreach (Token token in oldActiveList)
            {
                CollectSuccessorTokens(token);
            }
            GrowTimer.Stop();
        }
示例#3
0
        /// <summary>
        /// Collects the next set of emitting tokens from a token and accumulates them in the active or result lists
        /// </summary>
        /// <param name="token">The token to collect successors from be immediately expanded are placed. Null if we should always expand all nodes.</param>
        protected override void CollectSuccessorTokens(Token token)
        {
            // tokenTracker.add(token);
            // tokenTypeTracker.add(token);

            // If this is a final state, add it to the final list

            if (token.IsFinal)
            {
                ResultList.Add(GetResultListPredecessor(token));
                return;
            }

            // if this is a non-emitting token and we've already
            // visited the same state during this frame, then we
            // are in a grammar loop, so we don't continue to expand.
            // This check only works properly if we have kept all of the
            // tokens (instead of skipping the non-word tokens).
            // Note that certain linguists will never generate grammar loops
            // (lextree linguist for example). For these cases, it is perfectly
            // fine to disable this check by setting keepAllTokens to false

            if (!token.IsEmitting && (KeepAllTokens && IsVisited(token)))
            {
                return;
            }

            var state       = token.SearchState;
            var arcs        = state.GetSuccessors();
            var predecessor = GetResultListPredecessor(token);

            // For each successor
            // calculate the entry score for the token based upon the
            // predecessor token score and the transition probabilities
            // if the score is better than the best score encountered for
            // the SearchState and frame then create a new token, add
            // it to the lattice and the SearchState.
            // If the token is an emitting token add it to the list,
            // otherwise recursively collect the new tokens successors.

            var tokenScore             = token.Score;
            var beamThreshold          = ActiveList.GetBeamThreshold();
            var stateProducesPhoneHmms = state is LexTreeNonEmittingHMMState || state is LexTreeWordState ||
                                         state is LexTreeEndUnitState;

            foreach (var arc in arcs)
            {
                var nextState = arc.State;

                // prune states using lookahead heuristics
                if (stateProducesPhoneHmms)
                {
                    if (nextState is LexTreeHmmState)
                    {
                        Float penalty;
                        var   baseId = ((LexTreeHmmState)nextState).HmmState.HMM.BaseUnit.BaseID;
                        if ((penalty = _penalties.Get(baseId)) == null)
                        {
                            penalty = UpdateLookaheadPenalty(baseId);
                        }
                        if ((tokenScore + _lookaheadWeight * penalty) < beamThreshold)
                        {
                            continue;
                        }
                    }
                }

                if (_checkStateOrder)
                {
                    CheckStateOrder(state, nextState);
                }

                // We're actually multiplying the variables, but since
                // these come in log(), multiply gets converted to add
                var logEntryScore = tokenScore + arc.GetProbability();

                var bestToken = GetBestToken(nextState);

                if (bestToken == null)
                {
                    var newBestToken = new Token(predecessor, nextState, logEntryScore, arc.InsertionProbability, arc.LanguageProbability, CurrentFrameNumber);
                    TokensCreated.Value++;
                    SetBestToken(newBestToken, nextState);
                    ActiveListAdd(newBestToken);
                }
                else if (bestToken.Score < logEntryScore)
                {
                    // System.out.println("Updating " + bestToken + " with " +
                    // newBestToken);
                    var oldPredecessor = bestToken.Predecessor;
                    bestToken.Update(predecessor, nextState, logEntryScore, arc.InsertionProbability, arc.LanguageProbability, CurrentFrameNumber);
                    if (BuildWordLattice && nextState is IWordSearchState)
                    {
                        LoserManager.AddAlternatePredecessor(bestToken, oldPredecessor);
                    }
                }
                else if (BuildWordLattice && nextState is IWordSearchState)
                {
                    if (predecessor != null)
                    {
                        LoserManager.AddAlternatePredecessor(bestToken, predecessor);
                    }
                }
            }
        }