Beispiel #1
0
            private int m_dfaInitialState;              // initial automata state.

            /// <summary>
            /// Creates a new instance of the <c>TokenReader</c> class.
            /// </summary>
            /// <param name="reader">Source to parse.</param>
            /// <param name="grammar">Grammar rules.</param>
            public TokenReader(CharReader reader, Grammar grammar)
            {
                if (reader == null)
                {
                    throw new ArgumentNullException("charReader");
                }
                if (grammar == null)
                {
                    throw new ArgumentNullException("grammar");
                }

                m_charReader      = reader;
                m_dfaStateTable   = grammar.DfaStateTable;
                m_symbolTable     = grammar.SymbolTable;
                m_dfaInitialState = grammar.DfaInitialState;
                // Find special symbols.
                foreach (Symbol symbol in m_symbolTable)
                {
                    switch (symbol.SymbolType)
                    {
                    case SymbolType.Error:
                        m_errorSymbol = symbol;
                        break;

                    case SymbolType.End:
                        m_endSymbol = symbol;
                        break;
                    }
                }
            }
Beispiel #2
0
        /// <summary>
        /// Removes redundant arcs from the word graph.
        /// TODO: This seems to affect the results of an interactive translation session, so don't use it yet.
        /// </summary>
        /// <returns>The optimized word graph.</returns>
        public WordGraph Optimize()
        {
            var dfaArcs           = new List <WordGraphArc>();
            var dfaStates         = new DfaStateCollection();
            var dfaFinalStates    = new HashSet <int>();
            int nextDfaStateIndex = 1;
            var unmarkedStates    = new Queue <DfaState>();

            unmarkedStates.Enqueue(new DfaState(0, new[] { new NfaState(0) }));

            while (unmarkedStates.Count > 0)
            {
                DfaState dfaState      = unmarkedStates.Dequeue();
                var      candidateArcs = new Dictionary <string, DfaArc>();
                foreach ((int arcIndex, NfaState nfaState) in GetArcIndices(dfaState))
                {
                    WordGraphArc arc           = Arcs[arcIndex];
                    int          nextWordIndex = nfaState.WordIndex + 1;
                    DfaArc       candidateArc  = candidateArcs.GetOrCreate(arc.Words[nextWordIndex]);
                    if (nextWordIndex == arc.Words.Count - 1)
                    {
                        candidateArc.NfaStates.Add(new NfaState(arc.NextState));

                        Path path;
                        if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath))
                        {
                            path = new Path(prevPath.StartState, prevPath.Arcs.Concat(arcIndex),
                                            LogSpace.Multiply(prevPath.Score, arc.Score));
                        }
                        else
                        {
                            path = new Path(dfaState.Index, new[] { arcIndex }, arc.Score);
                        }

                        if (!candidateArc.Paths.TryGetValue(arc.NextState, out Path otherPath) ||
                            path.Score > otherPath.Score)
                        {
                            candidateArc.Paths[arc.NextState] = path;
                        }
                    }
                    else
                    {
                        candidateArc.NfaStates.Add(new NfaState(nfaState.StateIndex, arcIndex, nextWordIndex));
                        candidateArc.IsNextSubState = true;

                        if (dfaState.Paths.TryGetValue(nfaState.StateIndex, out Path prevPath))
                        {
                            candidateArc.Paths[nfaState.StateIndex] = prevPath;
                        }
                    }
                }

                foreach (DfaArc candidateArc in candidateArcs.Values)
                {
                    if (!dfaStates.TryGetValue(candidateArc.NfaStates, out DfaState nextDfaState))
                    {
                        int stateIndex = candidateArc.IsNextSubState ? dfaState.Index : nextDfaStateIndex++;
                        nextDfaState = new DfaState(stateIndex, candidateArc.NfaStates);
                        if (candidateArc.IsNextSubState)
                        {
                            foreach (KeyValuePair <int, Path> kvp in candidateArc.Paths)
                            {
                                nextDfaState.Paths.Add(kvp);
                            }
                        }
                        else
                        {
                            dfaStates.Add(nextDfaState);
                        }
                        unmarkedStates.Enqueue(nextDfaState);
                    }

                    bool isFinal = nextDfaState.NfaStates.Where(s => !s.IsSubState)
                                   .Any(s => FinalStates.Contains(s.StateIndex));
                    if ((isFinal || !candidateArc.IsNextSubState) && candidateArc.Paths.Count > 0)
                    {
                        Path bestPath = candidateArc.Paths.Values.MaxBy(p => p.Score);

                        int curState = bestPath.StartState;
                        for (int i = 0; i < bestPath.Arcs.Count; i++)
                        {
                            WordGraphArc nfaArc    = Arcs[bestPath.Arcs[i]];
                            int          nextState = !candidateArc.IsNextSubState && i == bestPath.Arcs.Count - 1
                                                                ? nextDfaState.Index
                                                                : nextDfaStateIndex++;
                            dfaArcs.Add(new WordGraphArc(curState, nextState, nfaArc.Score, nfaArc.Words,
                                                         nfaArc.Alignment, nfaArc.SourceSegmentRange, nfaArc.IsUnknown, nfaArc.WordConfidences));
                            curState = nextState;
                        }
                        if (isFinal)
                        {
                            dfaFinalStates.Add(curState);
                        }
                    }
                }
            }

            return(new WordGraph(dfaArcs, dfaFinalStates, InitialStateScore));
        }