/** * Because the growBranches() is called although no data is left after the * last speech frame, the ordering of the active-list might depend on the * transition probabilities and (penalty-scores) only. Therefore we need to * undo the last grow-step up to final states or the last emitting state in * order to fix the list. * * @return newly created list */ protected ActiveList undoLastGrowStep() { ActiveList fixedList = activeList.newInstance(); var tokens = JavaToCs.GetTokenCollection(activeList); foreach (Token token in tokens) { Token curToken = token.getPredecessor(); // remove the final states that are not the real final ones because // they're just hide prior final tokens: while (curToken.getPredecessor() != null && ((curToken.isFinal() && curToken.getPredecessor() != null && !curToken .getPredecessor().isFinal()) || (curToken.isEmitting() && curToken.getData() == null) // the // so // long // not // scored // tokens || (!curToken.isFinal() && !curToken.isEmitting()))) { curToken = curToken.getPredecessor(); } fixedList.add(curToken); } return(fixedList); }
protected internal virtual void localStart() { this.currentFrameNumber = 0; this.curTokensScored.value = (double)0f; ActiveList activeList = this.activeListFactory.newInstance(); SearchState initialState = this.linguist.getSearchGraph().getInitialState(); activeList.add(new Token(initialState, -1L)); this.activeList = activeList; this.growBranches(); }
/** * Gets the initial grammar node from the linguist and creates a * GrammarNodeToken */ protected void localStart() { currentFrameNumber = 0; curTokensScored.value = 0; ActiveList newActiveList = activeListFactory.newInstance(); SearchState state = linguist.getSearchGraph().getInitialState(); newActiveList.add(new Token(state, currentFrameNumber)); activeList = newActiveList; growBranches(); }
public override void add(Token token) { ActiveList activeList = this.findListFor(token); if (activeList == null) { string text = new StringBuilder().append("Cannot find ActiveList for ").append(Object.instancehelper_getClass(token.getSearchState())).toString(); throw new Error(text); } activeList.add(token); }
protected internal virtual ActiveList undoLastGrowStep() { ActiveList activeList = this.activeList.newInstance(); Iterator iterator = this.activeList.iterator(); while (iterator.hasNext()) { Token token = (Token)iterator.next(); Token predecessor = token.getPredecessor(); while (predecessor.getPredecessor() != null && ((predecessor.isFinal() && predecessor.getPredecessor() != null && !predecessor.getPredecessor().isFinal()) || (predecessor.isEmitting() && predecessor.getData() == null) || (!predecessor.isFinal() && !predecessor.isEmitting()))) { predecessor = predecessor.getPredecessor(); } activeList.add(predecessor); } return(activeList); }
protected internal override void localStart() { this.currentFastMatchFrameNumber = 0; if (this.loader is Sphinx3Loader && ((Sphinx3Loader)this.loader).hasTiedMixtures()) { ((Sphinx3Loader)this.loader).clearGauScores(); } this.fastmatchActiveList = this.fastmatchActiveListFactory.newInstance(); SearchState initialState = this.fastmatchLinguist.getSearchGraph().getInitialState(); ActiveList activeList = this.fastmatchActiveList; activeList.add(new Token(initialState, (long)this.currentFastMatchFrameNumber)); this.createFastMatchBestTokenMap(); this.growFastmatchBranches(); this.fastmatchStreamEnd = false; int num = 0; while (num < this.lookaheadWindow - 1 && !this.fastmatchStreamEnd) { this.fastMatchRecognize(); num++; } base.localStart(); }
/** * Collects the next set of emitting tokens from a token and accumulates * them in the active or result lists * * @param token * the token to collect successors from */ protected void collectSuccessorTokens(Token token) { //System.out.println(logRelativeWordBeamWidth); SearchState state = token.getSearchState(); // If this is a final state, add it to the final list if (token.isFinal()) { resultList.Add(token); } // if this is a non-emitting token and we've already // visited the same state during this frame, then we // are in a grammar loop, so we don't continue to expand. // This check only works properly if we have kept all of the // tokens (instead of skipping the non-word tokens). // Note that certain linguists will never generate grammar loops // (lextree linguist for example). For these cases, it is perfectly // fine to disable this check by setting keepAllTokens to false if (!token.isEmitting() && (keepAllTokens && isVisited(token))) { return; } if (token.getScore() < threshold) { return; } float penalty = 0.0f; // Changes made here not only to check for wordThreshold but also // Phrase Spotter's result if (state is WordSearchState) { FloatData data = (FloatData)token.getData(); Word word = token.getWord(); float phraseTime = (float)currentFrameNumber / 100; if (spotterContains(word.getSpelling(), phraseTime)) { penalty = 1.0f; // it's more of a reward Console.WriteLine("spotted"); phraseDetected = true; logger.Info("Token prioritized"); } if (token.getScore() < wordThreshold) { return; } } // Idea is to award the favouring token very well if (penalty != 0.0f) { token.setScore(token.getScore() + 10000.0f); setBestToken(token, state); } SearchStateArc[] arcs = state.getSuccessors(); // For each successor // calculate the entry score for the token based upon the // predecessor token score and the transition probabilities // if the score is better than the best score encountered for // the SearchState and frame then create a new token, add // it to the lattice and the SearchState. // If the token is an emitting token add it to the list, // otherwise recursively collect the new tokens successors. foreach (SearchStateArc arc in arcs) { SearchState nextState = arc.getState(); // We're actually multiplying the variables, but since // these come in log(), multiply gets converted to add float logEntryScore = token.getScore() + arc.getProbability() + penalty; if (wantEntryPruning) // false by default { if (logEntryScore < threshold) { continue; } if (nextState is WordSearchState && logEntryScore < wordThreshold) { continue; } } Token predecessor = getResultListPredecessor(token); Token bestToken = getBestToken(nextState); bool firstToken = bestToken == null; if (firstToken || bestToken.getScore() <= logEntryScore) { Token newToken = new Token(predecessor, nextState, logEntryScore, arc.getInsertionProbability(), arc.getLanguageProbability(), currentFrameNumber); tokensCreated.value++; setBestToken(newToken, nextState); if (!newToken.isEmitting()) { // if not emitting, check to see if we've already visited // this state during this frame. Expand the token only if we // haven't visited it already. This prevents the search // from getting stuck in a loop of states with no // intervening emitting nodes. This can happen with nasty // jsgf grammars such as ((foo*)*)* if (!isVisited(newToken)) { collectSuccessorTokens(newToken); } } else { if (firstToken) { activeList.add(newToken); } else { activeList.replace(bestToken, newToken); viterbiPruned.value++; } } } else { viterbiPruned.value++; } } }