Esempio n. 1
0
        public SRegex(string pattern, SRegexOptions options = SRegexOptions.Default)
        {
            var ast = Language.Parse(new SreSyntax(), pattern).Result.Node;
            switch (options)
            {
                case SRegexOptions.ByteCodeCompilation:
                    var compiler = new NfaVMBytecodeBackend(ast);
                    matcher = (int[] input) =>
                        {
                            var vm = new PikeNfaVM(compiler.Code.ToArray());
                            vm.Feed(input.Select(ch => (int)ch)).Done();
                            return vm.HasMatch;
                        };
                    break;
                case SRegexOptions.ILCompilation:
                    string methodName = "MatchSrePattern" + PatternID++;

                    var builder = new CachedMethod<MatchDelegate>(
                        methodName,
                        (emit, args) => EmitAst(emit, ast, args[0]));

                    matcher = builder.Delegate;
                    break;
                case SRegexOptions.NfaCompilation:
                    var nfa = new Nfa(ast);
                    matcher = nfa.Match;
                    break;
                case SRegexOptions.DfaCompilation:
                    var dfa = new RegularToDfaAlgorithm(new RegularTree(ast));
                    var simulation = new DfaSimulation(dfa.Data);
                    matcher = input => simulation.Match(input);
                    break;
            }
        }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                               .GetEquivalenceCsets()
                               .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);

            this.data = initialDfa.Data;

#if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
#endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }
        private void CreateDfa()
        {
            var dfa = new RegularToDfaAlgorithm(regularTree);
            this.simulation = new DfaSimulation(dfa.Data);
            this.serialization = new DfaSerialization(dfa.Data);

            // DEBUG: Console.WriteLine(dfa);
        }
        // 1. build full RegularTree (don't build first, following for positions)
        // 2. build regular alphabet (equivalense classes)
        // 3. build first, following for non-literal part of regular tree
        // 4. build non-literal TdfaData
        public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction)
        {
            var equivClasses = noConstRegTree
                                .GetEquivalenceCsets()
                                .Union(new [] { NewLines });
            var alphabet = new EquivalenceClassesAlphabet(equivClasses);

            foreach (var literal in literalToAction.Keys)
            {
                foreach (char ch in literal)
                {
                    alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch));
                }
            }

            // Step 1. Convert the NFA for non-constant REs to a DFA using the usual
            // algorithms for subset construction and state minimization [ASU86, WaG84].
            var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet);
            this.data = initialDfa.Data;

            #if false
            using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv"))
            {
                data.DescribeGraph(view);
            }
            #endif

            // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState
            // for every state s.
            int initialStateCount = data.StateCount;
            foreach (var S in data.EnumerateStates())
            {
                S.Tunnel = NoState;
            }

            // Step 3: Compute the set of ambiguous states of the tunnel automaton.
            this.ambiguous = FindAmbiguousStates();

            // Step 4: For every constant RE execute Step 5 which incrementally extends
            // the tunnel automaton. Continue with Step 6.
            foreach (var pair in literalToAction)
            {
                ExtendAutomatonWithLiteral(pair.Key, pair.Value);
            }

            var newlines = alphabet.Encode(NewLines);

            // Add new line handling
            foreach (var state in data.EnumerateStates())
            {
                var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines));
                if (i < 0)
                {
                    continue;
                }

                var newlineTransition = state.Outgoing[i];
                var to = data.GetState(newlineTransition.To);

                TdfaState newlineState;
                if (to.IsNewline)
                {
                    continue;
                }
                else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines)))
                {
                    newlineState = to;
                }
                else
                {
                    newlineState = new TdfaState(data);
                    data.AddState(newlineState);
                    newlineState.Tunnel      = newlineTransition.To;
                    newlineState.IsAccepting = to.IsAccepting;
                    newlineState.Actions.AddRange(to.Actions);

                    data.DeleteTransition(state.Index, newlines);
                    data.AddTransition(state.Index, newlines, newlineState.Index);
                }

                newlineState.IsNewline = true;
            }
        }