Ejemplo n.º 1
0
        public int AddState(TdfaState dfaState)
        {
            Debug.Assert(dfaState != null);
            Debug.Assert(dfaState.Tunnel == 0);

            int result = Dstates.Count;

            dfaState.Index  = result;
            dfaState.Tunnel = -1;
            Dstates.Add(dfaState);
            return(result);
        }
Ejemplo n.º 2
0
        public IEnumerable <IntArrow <int> > EnumerateRealTransitions(TdfaState S)
        {
            int state = S.Index;

            foreach (var t in S.Outgoing)
            {
                var intervalSet = (MutableIntervalIntSet)data.Alphabet.Decode(t.Symbols);
                foreach (var interval in intervalSet.EnumerateIntervals())
                {
                    yield return(new IntArrow <int>(interval, t.To));
                }
            }
        }
Ejemplo n.º 3
0
        public IEnumerable<IntArrow<int>> EnumerateRealTransitions(TdfaState S)
        {
            int state = S.Index;

            foreach (var t in S.Outgoing)
            {
                var intervalSet = (MutableIntervalIntSet)data.Alphabet.Decode(t.Symbols);
                foreach (var interval in intervalSet.EnumerateIntervals())
                {
                    yield return new IntArrow<int>(interval, t.To);
                }
            }
        }
Ejemplo n.º 4
0
        private static string GetStateName(TdfaState S)
        {
            var output = new StringBuilder();

            output.Append(S.Index);
            if (S.Actions.Count != 0)
            {
                output.Append(" [");
                output.Append(string.Join(",", S.Actions));
                output.Append("]");
            }

            return(output.ToString());
        }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                               .GetEquivalenceCsets()
                               .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);

            this.data = initialDfa.Data;

#if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
#endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }
        private void ExtendAutomatonWithLiteral(string literal, int scanAction)
        {
            var state = data.Start;

            var  symbols = EnumerateLiteralSymbols(data.Alphabet, literal).GetEnumerator();
            bool hasSymbol;

            // trace and do nothing
            while ((hasSymbol = symbols.MoveNext()))
            {
                State next = Control(state, symbols.Current);
                if (next == NoState || ambiguous.Contains(next))
                {
                    break;
                }

                state = next;
            }

            int previous = state;

            // trace and duplicate the path
            while (hasSymbol)
            {
                State next = Control(state, symbols.Current);
                if (next != NoState)
                {
                    state = next;

                    var newStateInfo = new TdfaState(data);
                    int newState     = data.AddState(newStateInfo);
                    data.DeleteTransition(from: previous, symbol: symbols.Current);
                    data.AddTransition(
                        from: previous,
                        symbol: symbols.Current,
                        to: newState
                        );
                    var S = data.GetState(state);
                    newStateInfo.IsAccepting = S.IsAccepting;
                    newStateInfo.Actions.AddRange(S.Actions);
                    newStateInfo.Tunnel = state;
                    previous            = newState;

                    hasSymbol = symbols.MoveNext();
                }
                else
                {
                    var S = data.GetState(state);
                    if (S.Tunnel == NoState)
                    {
                        break;
                    }

                    state = S.Tunnel;
                }
            }

            // extend the path
            for (; hasSymbol; hasSymbol = symbols.MoveNext())
            {
                var newStateInfo = new TdfaState(data);
                int newState     = data.AddState(newStateInfo);
                data.AddTransition(
                    from: previous,
                    symbol: symbols.Current,
                    to: newState
                    );
                newStateInfo.Tunnel = NoState;
                previous            = newState;
            }

            // process new final state
            var finalState = data.GetState(previous);

            finalState.IsAccepting = true;
            finalState.Actions.Insert(0, scanAction);
        }
        public void RegisterState(TdfaState state)
        {
            if (!state.IsAccepting)
            {
                return;
            }

            var orderedActions = state.Actions;

            var stateTokenProducers =
                from act in orderedActions
                select actionToTokenProducer[act];

            var stateTokenProducer = TokenProducerInfo.Combine(tokenSetType, stateTokenProducers);
            switch (stateTokenProducer.PossibleTokens.Count)
            {
                case 0:
                    state.EnvelopeId = -1;
                    break;
                case 1:
                    state.EnvelopeId = stateTokenProducer.PossibleTokens.First();
                    break;
                default:
                    stateTokenProducer.State = state;
                    stateToTokenProducer[state] = stateTokenProducer;
                    break;
            }

            state.Actions.Clear();
            state.Actions.AddRange(stateTokenProducer.RealActions);
            state.Actions.Sort();
        }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                                .GetEquivalenceCsets()
                                .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);
            this.data = initialDfa.Data;

            #if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
            #endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }
        private void ExtendAutomatonWithLiteral(string literal, int scanAction)
        {
            var state = data.Start;

            var symbols = EnumerateLiteralSymbols(data.Alphabet, literal).GetEnumerator();
            bool hasSymbol;

            // trace and do nothing
            while ((hasSymbol = symbols.MoveNext()))
            {
                State next = Control(state, symbols.Current);
                if (next == NoState || ambiguous.Contains(next))
                {
                    break;
                }

                state = next;
            }

            int previous = state;

            // trace and duplicate the path
            while (hasSymbol)
            {
                State next = Control(state, symbols.Current);
                if (next != NoState)
                {
                    state = next;

                    var newStateInfo = new TdfaState(data);
                    int newState = data.AddState(newStateInfo);
                    data.DeleteTransition(from: previous, symbol: symbols.Current);
                    data.AddTransition(
                            from: previous,
                            symbol: symbols.Current,
                            to: newState
                        );
                    var S = data.GetState(state);
                    newStateInfo.IsAccepting = S.IsAccepting;
                    newStateInfo.Actions.AddRange(S.Actions);
                    newStateInfo.Tunnel = state;
                    previous = newState;

                    hasSymbol = symbols.MoveNext();
                }
                else
                {
                    var S = data.GetState(state);
                    if (S.Tunnel == NoState)
                    {
                        break;
                    }

                    state = S.Tunnel;
                }
            }

            // extend the path
            for (; hasSymbol; hasSymbol = symbols.MoveNext())
            {
                var newStateInfo = new TdfaState(data);
                int newState = data.AddState(newStateInfo);
                data.AddTransition(
                        from: previous,
                        symbol: symbols.Current,
                        to: newState
                    );
                newStateInfo.Tunnel = NoState;
                previous = newState;
            }

            // process new final state
            var finalState = data.GetState(previous);
            finalState.IsAccepting = true;
            finalState.Actions.Insert(0, scanAction);
        }