/** * Collects the next set of emitting tokens from a token and accumulates * them in the active or result lists * * @param token * the token to collect successors from */ protected void collectSuccessorTokens(Token token) { //System.out.println(logRelativeWordBeamWidth); SearchState state = token.getSearchState(); // If this is a final state, add it to the final list if (token.isFinal()) { resultList.Add(token); } // if this is a non-emitting token and we've already // visited the same state during this frame, then we // are in a grammar loop, so we don't continue to expand. // This check only works properly if we have kept all of the // tokens (instead of skipping the non-word tokens). // Note that certain linguists will never generate grammar loops // (lextree linguist for example). For these cases, it is perfectly // fine to disable this check by setting keepAllTokens to false if (!token.isEmitting() && (keepAllTokens && isVisited(token))) { return; } if (token.getScore() < threshold) { return; } float penalty = 0.0f; // Changes made here not only to check for wordThreshold but also // Phrase Spotter's result if (state is WordSearchState) { FloatData data = (FloatData)token.getData(); Word word = token.getWord(); float phraseTime = (float)currentFrameNumber / 100; if (spotterContains(word.getSpelling(), phraseTime)) { penalty = 1.0f; // it's more of a reward Console.WriteLine("spotted"); phraseDetected = true; logger.Info("Token prioritized"); } if (token.getScore() < wordThreshold) { return; } } // Idea is to award the favouring token very well if (penalty != 0.0f) { token.setScore(token.getScore() + 10000.0f); setBestToken(token, state); } SearchStateArc[] arcs = state.getSuccessors(); // For each successor // calculate the entry score for the token based upon the // predecessor token score and the transition probabilities // if the score is better than the best score encountered for // the SearchState and frame then create a new token, add // it to the lattice and the SearchState. // If the token is an emitting token add it to the list, // otherwise recursively collect the new tokens successors. foreach (SearchStateArc arc in arcs) { SearchState nextState = arc.getState(); // We're actually multiplying the variables, but since // these come in log(), multiply gets converted to add float logEntryScore = token.getScore() + arc.getProbability() + penalty; if (wantEntryPruning) // false by default { if (logEntryScore < threshold) { continue; } if (nextState is WordSearchState && logEntryScore < wordThreshold) { continue; } } Token predecessor = getResultListPredecessor(token); Token bestToken = getBestToken(nextState); bool firstToken = bestToken == null; if (firstToken || bestToken.getScore() <= logEntryScore) { Token newToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(), arc.getLanguageProbability(), currentFrameNumber); tokensCreated.value++; setBestToken(newToken, nextState); if (!newToken.isEmitting()) { // if not emitting, check to see if we've already visited // this state during this frame. Expand the token only if we // haven't visited it already. This prevents the search // from getting stuck in a loop of states with no // intervening emitting nodes. This can happen with nasty // jsgf grammars such as ((foo*)*)* if (!isVisited(newToken)) { collectSuccessorTokens(newToken); } } else { if (firstToken) { activeList.add(newToken); } else { activeList.replace(bestToken, newToken); viterbiPruned.value++; } } } else { viterbiPruned.value++; } } }