// 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                               .GetEquivalenceCsets()
                               .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);

            this.data = initialDfa.Data;

#if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
#endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }
        private void ExtendAutomatonWithLiteral(string literal, int scanAction)
        {
            var state = data.Start;

            var  symbols = EnumerateLiteralSymbols(data.Alphabet, literal).GetEnumerator();
            bool hasSymbol;

            // trace and do nothing
            while ((hasSymbol = symbols.MoveNext()))
            {
                State next = Control(state, symbols.Current);
                if (next == NoState || ambiguous.Contains(next))
                {
                    break;
                }

                state = next;
            }

            int previous = state;

            // trace and duplicate the path
            while (hasSymbol)
            {
                State next = Control(state, symbols.Current);
                if (next != NoState)
                {
                    state = next;

                    var newStateInfo = new TdfaState(data);
                    int newState     = data.AddState(newStateInfo);
                    data.DeleteTransition(from: previous, symbol: symbols.Current);
                    data.AddTransition(
                        from: previous,
                        symbol: symbols.Current,
                        to: newState
                        );
                    var S = data.GetState(state);
                    newStateInfo.IsAccepting = S.IsAccepting;
                    newStateInfo.Actions.AddRange(S.Actions);
                    newStateInfo.Tunnel = state;
                    previous            = newState;

                    hasSymbol = symbols.MoveNext();
                }
                else
                {
                    var S = data.GetState(state);
                    if (S.Tunnel == NoState)
                    {
                        break;
                    }

                    state = S.Tunnel;
                }
            }

            // extend the path
            for (; hasSymbol; hasSymbol = symbols.MoveNext())
            {
                var newStateInfo = new TdfaState(data);
                int newState     = data.AddState(newStateInfo);
                data.AddTransition(
                    from: previous,
                    symbol: symbols.Current,
                    to: newState
                    );
                newStateInfo.Tunnel = NoState;
                previous            = newState;
            }

            // process new final state
            var finalState = data.GetState(previous);

            finalState.IsAccepting = true;
            finalState.Actions.Insert(0, scanAction);
        }
        public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet)
        {
            this.alphabet = alphabet;

            data = new TdfaData(alphabet);

            data.AddState(new TdfaState(data) { Positions = regTree.FirstPos });

            foreach (var st in data.EnumerateStates())
            {
                int Sindex = st.Index;
                var S = st.Positions;
                if (S.Contains(regTree.EoiPosition))
                {
                    data.GetState(Sindex).IsAccepting = true;
                }

                var actionPosList = new SortedList<int, int>();

                foreach (var position in S)
                {
                    var action = regTree.GetPosAction(position);
                    if (action.HasValue)
                    {
                        actionPosList[position] = action.Value;
                    }
                }

                if (actionPosList.Count != 0)
                {
                    st.Actions.AddRange(actionPosList.Values);
                }

                var transitionSymbols = alphabet.SymbolSetType.Union(
                                            st.Positions
                                            .Select(regTree.GetPosSymbols)
                                            .Select(alphabet.Encode));
                foreach (var symbol in transitionSymbols)
                {
                    if (symbol == alphabet.EoiSymbol)
                    {
                        continue;
                    }

                    var U = TdfaData.PositionSetType.Mutable();
                    foreach (var position in S)
                    {
                        var cset = alphabet.Encode(regTree.GetPosSymbols(position));
                        if (cset.Contains(symbol))
                        {
                            U.AddAll(regTree.GetFollowPos(position));
                        }
                    }

                    if (!U.IsEmpty)
                    {
                        int Uindex = data.IndexOfState(U);
                        if (Uindex < 0)
                        {
                            Uindex = data.AddState(new TdfaState(data) { Positions = U });
                        }

                        data.AddTransition(from: Sindex, symbol: symbol, to: Uindex);
                    }
                }
            }
        }
Ejemplo n.º 4
0
        public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet)
        {
            this.alphabet = alphabet;

            data = new TdfaData(alphabet);

            data.AddState(new TdfaState(data)
            {
                Positions = regTree.FirstPos
            });

            foreach (var st in data.EnumerateStates())
            {
                int Sindex = st.Index;
                var S      = st.Positions;
                if (S.Contains(regTree.EoiPosition))
                {
                    data.GetState(Sindex).IsAccepting = true;
                }

                var actionPosList = new SortedList <int, int>();

                foreach (var position in S)
                {
                    var action = regTree.GetPosAction(position);
                    if (action.HasValue)
                    {
                        actionPosList[position] = action.Value;
                    }
                }

                if (actionPosList.Count != 0)
                {
                    st.Actions.AddRange(actionPosList.Values);
                }

                var transitionSymbols = alphabet.SymbolSetType.Union(
                    st.Positions
                    .Select(regTree.GetPosSymbols)
                    .Select(alphabet.Encode));
                foreach (var symbol in transitionSymbols)
                {
                    if (symbol == alphabet.EoiSymbol)
                    {
                        continue;
                    }

                    var U = TdfaData.PositionSetType.Mutable();
                    foreach (var position in S)
                    {
                        var cset = alphabet.Encode(regTree.GetPosSymbols(position));
                        if (cset.Contains(symbol))
                        {
                            U.AddAll(regTree.GetFollowPos(position));
                        }
                    }

                    if (!U.IsEmpty)
                    {
                        int Uindex = data.IndexOfState(U);
                        if (Uindex < 0)
                        {
                            Uindex = data.AddState(new TdfaState(data)
                            {
                                Positions = U
                            });
                        }

                        data.AddTransition(from: Sindex, symbol: symbol, to: Uindex);
                    }
                }
            }
        }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                                .GetEquivalenceCsets()
                                .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);
            this.data = initialDfa.Data;

            #if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
            #endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }