private int m_dfaInitialState; // initial automata state. /// <summary> /// Creates a new instance of the <c>TokenReader</c> class. /// </summary> /// <param name="reader">Source to parse.</param> /// <param name="grammar">Grammar rules.</param> public TokenReader(CharReader reader, Grammar grammar) { if (reader == null) { throw new ArgumentNullException("charReader"); } if (grammar == null) { throw new ArgumentNullException("grammar"); } m_charReader = reader; m_dfaStateTable = grammar.DfaStateTable; m_symbolTable = grammar.SymbolTable; m_dfaInitialState = grammar.DfaInitialState; // Find special symbols. foreach (Symbol symbol in m_symbolTable) { switch (symbol.SymbolType) { case SymbolType.Error: m_errorSymbol = symbol; break; case SymbolType.End: m_endSymbol = symbol; break; } } }
/// <summary> /// Removes redundant arcs from the word graph. /// TODO: This seems to affect the results of an interactive translation session, so don't use it yet. /// </summary> /// <returns>The optimized word graph.</returns> public WordGraph Optimize() { var dfaArcs = new List <WordGraphArc>(); var dfaStates = new DfaStateCollection(); var dfaFinalStates = new HashSet <int>(); int nextDfaStateIndex = 1; var unmarkedStates = new Queue <DfaState>(); unmarkedStates.Enqueue(new DfaState(0, new[] { new NfaState(0) })); while (unmarkedStates.Count > 0) { DfaState dfaState = unmarkedStates.Dequeue(); var candidateArcs = new Dictionary <string, DfaArc>(); foreach ((int arcIndex, NfaState nfaState) in GetArcIndices(dfaState)) { WordGraphArc arc = Arcs[arcIndex]; int nextWordIndex = nfaState.WordIndex + 1; DfaArc candidateArc = candidateArcs.GetOrCreate(arc.Words[nextWordIndex]); if (nextWordIndex == arc.Words.Count - 1) { candidateArc.NfaStates.Add(new NfaState(arc.NextState)); Path path; if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath)) { path = new Path(prevPath.StartState, prevPath.Arcs.Concat(arcIndex), LogSpace.Multiply(prevPath.Score, arc.Score)); } else { path = new Path(dfaState.Index, new[] { arcIndex }, arc.Score); } if (!candidateArc.Paths.TryGetValue(arc.NextState, out Path otherPath) || path.Score > otherPath.Score) { candidateArc.Paths[arc.NextState] = path; } } else { candidateArc.NfaStates.Add(new NfaState(nfaState.StateIndex, arcIndex, nextWordIndex)); candidateArc.IsNextSubState = true; if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath)) { candidateArc.Paths[nfaState.StateIndex] = prevPath; } } } foreach (DfaArc candidateArc in candidateArcs.Values) { if (!dfaStates.TryGetValue(candidateArc.NfaStates, out DfaState nextDfaState)) { int stateIndex = candidateArc.IsNextSubState ? dfaState.Index : nextDfaStateIndex++; nextDfaState = new DfaState(stateIndex, candidateArc.NfaStates); if (candidateArc.IsNextSubState) { foreach (KeyValuePair <int, Path> kvp in candidateArc.Paths) { nextDfaState.Paths.Add(kvp); } } else { dfaStates.Add(nextDfaState); } unmarkedStates.Enqueue(nextDfaState); } bool isFinal = nextDfaState.NfaStates.Where(s => !s.IsSubState) .Any(s => FinalStates.Contains(s.StateIndex)); if ((isFinal || !candidateArc.IsNextSubState) && candidateArc.Paths.Count > 0) { Path bestPath = candidateArc.Paths.Values.MaxBy(p => p.Score); int curState = bestPath.StartState; for (int i = 0; i < bestPath.Arcs.Count; i++) { WordGraphArc nfaArc = Arcs[bestPath.Arcs[i]]; int nextState = !candidateArc.IsNextSubState && i == bestPath.Arcs.Count - 1 ? nextDfaState.Index : nextDfaStateIndex++; dfaArcs.Add(new WordGraphArc(curState, nextState, nfaArc.Score, nfaArc.Words, nfaArc.Alignment, nfaArc.SourceSegmentRange, nfaArc.IsUnknown, nfaArc.WordConfidences)); curState = nextState; } if (isFinal) { dfaFinalStates.Add(curState); } } } } return(new WordGraph(dfaArcs, dfaFinalStates, InitialStateScore)); }