示例#1
0
        public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet)
        {
            this.alphabet = alphabet;

            data = new TdfaData(alphabet);

            data.AddState(new TdfaState(data)
            {
                Positions = regTree.FirstPos
            });

            foreach (var st in data.EnumerateStates())
            {
                int Sindex = st.Index;
                var S      = st.Positions;
                if (S.Contains(regTree.EoiPosition))
                {
                    data.GetState(Sindex).IsAccepting = true;
                }

                var actionPosList = new SortedList <int, int>();

                foreach (var position in S)
                {
                    var action = regTree.GetPosAction(position);
                    if (action.HasValue)
                    {
                        actionPosList[position] = action.Value;
                    }
                }

                if (actionPosList.Count != 0)
                {
                    st.Actions.AddRange(actionPosList.Values);
                }

                var transitionSymbols = alphabet.SymbolSetType.Union(
                    st.Positions
                    .Select(regTree.GetPosSymbols)
                    .Select(alphabet.Encode));
                foreach (var symbol in transitionSymbols)
                {
                    if (symbol == alphabet.EoiSymbol)
                    {
                        continue;
                    }

                    var U = TdfaData.PositionSetType.Mutable();
                    foreach (var position in S)
                    {
                        var cset = alphabet.Encode(regTree.GetPosSymbols(position));
                        if (cset.Contains(symbol))
                        {
                            U.AddAll(regTree.GetFollowPos(position));
                        }
                    }

                    if (!U.IsEmpty)
                    {
                        int Uindex = data.IndexOfState(U);
                        if (Uindex < 0)
                        {
                            Uindex = data.AddState(new TdfaState(data)
                            {
                                Positions = U
                            });
                        }

                        data.AddTransition(from: Sindex, symbol: symbol, to: Uindex);
                    }
                }
            }
        }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                               .GetEquivalenceCsets()
                               .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);

            this.data = initialDfa.Data;

#if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
#endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }
示例#3
0
 public RegularToDfaAlgorithm(RegularTree regTree)
     : this(regTree, new EquivalenceClassesAlphabet(regTree.GetEquivalenceCsets()))
     // this(regTree, new RegularAlphabet(regTree.Positions.Select(node => node.Characters)))
 {
 }
        public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet)
        {
            this.alphabet = alphabet;

            data = new TdfaData(alphabet);

            data.AddState(new TdfaState(data) { Positions = regTree.FirstPos });

            foreach (var st in data.EnumerateStates())
            {
                int Sindex = st.Index;
                var S = st.Positions;
                if (S.Contains(regTree.EoiPosition))
                {
                    data.GetState(Sindex).IsAccepting = true;
                }

                var actionPosList = new SortedList<int, int>();

                foreach (var position in S)
                {
                    var action = regTree.GetPosAction(position);
                    if (action.HasValue)
                    {
                        actionPosList[position] = action.Value;
                    }
                }

                if (actionPosList.Count != 0)
                {
                    st.Actions.AddRange(actionPosList.Values);
                }

                var transitionSymbols = alphabet.SymbolSetType.Union(
                                            st.Positions
                                            .Select(regTree.GetPosSymbols)
                                            .Select(alphabet.Encode));
                foreach (var symbol in transitionSymbols)
                {
                    if (symbol == alphabet.EoiSymbol)
                    {
                        continue;
                    }

                    var U = TdfaData.PositionSetType.Mutable();
                    foreach (var position in S)
                    {
                        var cset = alphabet.Encode(regTree.GetPosSymbols(position));
                        if (cset.Contains(symbol))
                        {
                            U.AddAll(regTree.GetFollowPos(position));
                        }
                    }

                    if (!U.IsEmpty)
                    {
                        int Uindex = data.IndexOfState(U);
                        if (Uindex < 0)
                        {
                            Uindex = data.AddState(new TdfaState(data) { Positions = U });
                        }

                        data.AddTransition(from: Sindex, symbol: symbol, to: Uindex);
                    }
                }
            }
        }
 // this(regTree, new RegularAlphabet(regTree.Positions.Select(node => node.Characters)))
 public RegularToDfaAlgorithm(RegularTree regTree)
     : this(regTree, new EquivalenceClassesAlphabet(regTree.GetEquivalenceCsets()))
 {
 }
 private void GivenRegularExpression(AstNode astNode)
 {
     this.regularTree = new RegularTree(astNode);
 }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                                .GetEquivalenceCsets()
                                .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);
            this.data = initialDfa.Data;

            #if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
            #endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }