/** * Generate a NFA from the given pattern using Thompson's Construction and * add it to the current overall NFA. */ public void post2nfa(Pattern pattern) { var tree = pattern.AstNode as SpecParserParser.PatternContext; var post_order = ComputePostOrder(tree); var fragmentStack = new Stack <Fragment>(); var completeNfa = new Fragment(); foreach (var p in post_order) { var p_text = p.GetText(); var p_type = p.GetType(); // simple_basic, kleen_star_basic, and continued_basic are very special: // add in ".*" between each item. if (p as SpecParserParser.Simple_basicContext != null || p as SpecParserParser.Kleene_star_basicContext != null ) { var last = fragmentStack.Pop(); var first = true; var c = p.GetChild(0); bool not = (c.GetText() == "!"); for (var i = p.ChildCount - (not ? 3:2); i >= 0; --i) { // For Piggy, we're going to use a special edge // to denote attribute or child node recognition // because there is lookahead for the attribute // or child node, not one symbol. In addition, once // a child is found next, no attributes can occur. var f = fragmentStack.Pop(); //System.Console.Error.WriteLine(f.ToString()); //System.Console.Error.WriteLine(nfa.ToString()); if (i > 0 && first) { var foobar = last.StartState.Owner.SuccessorEdges(last.StartState); var cc = foobar.Count(); var ff = foobar.FirstOrDefault(); if (cc == 1 && (ff.IsCode || ff.IsText)) { } else { // Add in ".*" var s1 = new State(_nfa); var s2 = new State(_nfa); var s3 = new State(_nfa); var e1 = new Edge(_nfa, s1, s2, Edge.EmptyAst); var e2 = new Edge(_nfa, s2, s3, Edge.EmptyAst); var e3 = new Edge(_nfa, s2, s2, Edge.EmptyAst, (int)Edge.EdgeModifiersEnum.Any); var e4 = new Edge(_nfa, s3, last.StartState, Edge.EmptyAst); last = new Fragment(s1, last.OutStates); } } foreach (var o in f.OutStates) { var e5 = new Edge(_nfa, o, last.StartState, Edge.EmptyAst); } last = new Fragment(f.StartState, last.OutStates); } fragmentStack.Push(last); } else if (p as SpecParserParser.BasicContext != null) { } else if (p as SpecParserParser.PatternContext != null) { } else if (p as TerminalNodeImpl != null) { var t = p as TerminalNodeImpl; var s = t.Symbol; var s_type = s.Type; if (s.Type == SpecParserParser.OPEN_PAREN || s.Type == SpecParserParser.OPEN_KLEENE_STAR_PAREN || s.Type == SpecParserParser.CLOSE_PAREN || s.Type == SpecParserParser.CLOSE_KLEENE_STAR_PAREN) { var s1 = new State(_nfa); var s2 = new State(_nfa); var e = new Edge(_nfa, s1, s2, new List <IParseTree> { t }); var f = new Fragment(s1, s2); fragmentStack.Push(f); } } else if (p as SpecParserParser.Id_or_star_or_emptyContext != null) { var c = p.GetChild(0); var s1 = new State(_nfa); var s2 = new State(_nfa); var e = new Edge(_nfa, s1, s2, new List <IParseTree> { c }); var f = new Fragment(s1, s2); fragmentStack.Push(f); } else if (p as SpecParserParser.MoreContext != null) { } else if (p as SpecParserParser.TextContext != null) { var s1 = new State(_nfa); var s2 = new State(_nfa); var e = new Edge(_nfa, s1, s2, new List <IParseTree> { p }, (int)Edge.EdgeModifiersEnum.Text); var f = new Fragment(s1, s2); fragmentStack.Push(f); } else if (p as SpecParserParser.CodeContext != null) { var s1 = new State(_nfa); var s2 = new State(_nfa); var e = new Edge(_nfa, s1, s2, new List <IParseTree> { p }, (int)Edge.EdgeModifiersEnum.Code); var f = new Fragment(s1, s2); fragmentStack.Push(f); } else if (p as SpecParserParser.Group_rexpContext != null) { } else if (p as SpecParserParser.Star_rexpContext != null) { var previous = fragmentStack.Pop(); { // Add in ".*" before "previous" var s1 = new State(_nfa); var s2 = new State(_nfa); var s3 = new State(_nfa); var e1 = new Edge(_nfa, s1, s2, Edge.EmptyAst); var e2 = new Edge(_nfa, s2, s3, Edge.EmptyAst); var e3 = new Edge(_nfa, s2, s2, Edge.EmptyAst, (int)Edge.EdgeModifiersEnum.Any); var e4 = new Edge(_nfa, s3, previous.StartState, Edge.EmptyAst); previous = new Fragment(s1, previous.OutStates); } { // Add in state s1 before previous. var s1 = new State(_nfa); var e1 = new Edge(_nfa, s1, previous.StartState, Edge.EmptyAst); // Add in back edges to s1. foreach (var s in previous.OutStates) { var e2 = new Edge(_nfa, s, s1, Edge.EmptyAst); } var f = new Fragment(s1, s1); fragmentStack.Push(f); } } else if (p as SpecParserParser.Plus_rexpContext != null) { var previous = fragmentStack.Pop(); { // Add in ".*" before "previous" var s1 = new State(_nfa); var s2 = new State(_nfa); var s3 = new State(_nfa); var e1 = new Edge(_nfa, s1, s2, Edge.EmptyAst); var e2 = new Edge(_nfa, s2, s3, Edge.EmptyAst); var e3 = new Edge(_nfa, s2, s2, Edge.EmptyAst, (int)Edge.EdgeModifiersEnum.Any); var e4 = new Edge(_nfa, s3, previous.StartState, Edge.EmptyAst); previous = new Fragment(s1, previous.OutStates); } { // Add state after previous outstates. var s1 = new State(_nfa); foreach (var s in previous.OutStates) { var e3 = new Edge(_nfa, s, s1, Edge.EmptyAst); } // Add edge from s1 back edge to previous. var e4 = new Edge(_nfa, s1, previous.StartState, Edge.EmptyAst); // Finish up with fragment. var f = new Fragment(previous.StartState, s1); fragmentStack.Push(f); } } else if (p as SpecParserParser.AttrContext != null) { var c = p.GetChild(0); if (c.GetText() == "!") { var c2 = p.GetChild(1); var t = c2 as TerminalNodeImpl; var s = t.Symbol; var s_type = s.Type; var s1 = new State(_nfa); var s2 = new State(_nfa); var e = new Edge(_nfa, s1, s2, new List <IParseTree> { t }, (int)Edge.EdgeModifiersEnum.Not); var f = new Fragment(s1, s2); fragmentStack.Push(f); } else { var t = c as TerminalNodeImpl; var s = t.Symbol; var s_type = s.Type; var s1 = new State(_nfa); var s2 = new State(_nfa); var s3 = new State(_nfa); var s4 = new State(_nfa); var e1 = new Edge(_nfa, s1, s2, new List <IParseTree> { t }); t = p.GetChild(1) as TerminalNodeImpl; var e2 = new Edge(_nfa, s2, s3, new List <IParseTree> { t }); t = p.GetChild(2) as TerminalNodeImpl; var e3 = new Edge(_nfa, s3, s4, new List <IParseTree> { t }); var f = new Fragment(s1, s4); fragmentStack.Push(f); } } else if (p as SpecParserParser.RexpContext != null) { for (var i = 2; i < p.ChildCount; i += 2) { var s = new State(_nfa); var s2 = fragmentStack.Pop(); { // Add in ".*" var sa = new State(_nfa); var sb = new State(_nfa); var sc = new State(_nfa); var ea = new Edge(_nfa, sa, sb, Edge.EmptyAst); var eb = new Edge(_nfa, sb, sc, Edge.EmptyAst); var e3 = new Edge(_nfa, sb, sb, Edge.EmptyAst, (int)Edge.EdgeModifiersEnum.Any); var e4 = new Edge(_nfa, sc, s2.StartState, Edge.EmptyAst); s2 = new Fragment(sa, s2.OutStates); } var s1 = fragmentStack.Pop(); { // Add in ".*" var sa = new State(_nfa); var sb = new State(_nfa); var sc = new State(_nfa); var ea = new Edge(_nfa, sa, sb, Edge.EmptyAst); var eb = new Edge(_nfa, sb, sc, Edge.EmptyAst); var e3 = new Edge(_nfa, sb, sb, Edge.EmptyAst, (int)Edge.EdgeModifiersEnum.Any); var e4 = new Edge(_nfa, sc, s1.StartState, Edge.EmptyAst); s1 = new Fragment(sa, s1.OutStates); } var e1 = new Edge(_nfa, s, s1.StartState, Edge.EmptyAst); var e2 = new Edge(_nfa, s, s2.StartState, Edge.EmptyAst); var s3 = new State(_nfa); foreach (var o in s1.OutStates) { var e3 = new Edge(_nfa, o, s3, Edge.EmptyAst); } foreach (var o in s2.OutStates) { var e3 = new Edge(_nfa, o, s3, Edge.EmptyAst); } var f = new Fragment(s, s3); fragmentStack.Push(f); } } } completeNfa = fragmentStack.Pop(); if (fragmentStack.Count > 0) { throw new Exception("Fragment stack not empty."); } foreach (var s in completeNfa.OutStates) { _nfa.AddFinalState(s); } // Add in the NFA for this pattern into overall NFA. var eek = new Edge(_nfa, _start_state, completeNfa.StartState, Edge.EmptyAst); }
/* Apply powerset construction to the NFA to convert to a DFA. * Note, the result of this method isn't strictly a DFA because * edges in the automaton can be text or code, which function just like * epsilon transitions--no input is consumed for the edge. */ public Automaton Optimize(Automaton nfa) { var dfa = new Automaton(); // For every state s, compute collection of states along epsilon edges // to get an initial computation of dfa states. foreach (var s in nfa.Vertices) { var c = ClosureTaker.GetClosure(new List <State> { s }, nfa); _closure[s] = c; } // For every state set, compute sums and fix up state sets. foreach (var p in _closure) { var key = p.Key; var set = p.Value; foreach (var s in set) { _closure[s].UnionWith(set); } } // For every state in nfa using Tarjan walk, // sum sets with common transitions. var ordered_list = new TarjanNoBackEdges <State, Edge>(nfa).ToList(); ordered_list.Reverse(); var changed = true; while (changed) { changed = false; foreach (var s in ordered_list) { var closure = _closure[s]; var transitions = ClosureTaker.GatherTransitions(closure); foreach (var transition_set in transitions) { var key = transition_set.Key; var value = transition_set.Value; var state_set = new SmartSet <State>(); // All states in value must have common set in dfa. foreach (var e in value) { var c = e.To; var cl = _closure[c]; state_set.UnionWith(cl); } foreach (var c in state_set) { if (!_closure[c].Equals(state_set)) { _closure[c] = state_set; changed = true; } } } } } var initialState = CreateInitialState(nfa, dfa); foreach (var p in _closure) { var state_set = p.Value; var new_dfa_state = FindHashSetState(dfa, state_set); if (new_dfa_state == null) { var state = AddHashSetState(dfa, state_set); { var mark = false; foreach (var s in state_set) { if (nfa.FinalStates.Contains(s)) { mark = true; } } if (mark && !dfa.FinalStates.Contains(state)) { dfa.AddFinalState(state); } } } } //System.Console.Error.WriteLine(dfa.ToString()); foreach (var p in _closure) { var k = p.Key; var state_set = p.Value; var dfa_state = FindHashSetState(dfa, state_set); // System.Console.Error.WriteLine("State " + dfa_state.Id + ":" // + state_set.Aggregate( // "", // start with empty string to handle empty list case. // (current, next) => current + ", " + next)); } //System.Console.Error.WriteLine(dfa.ToString()); foreach (var from_dfa_state in dfa.Vertices) { var nfa_state_set = FindHashSet(from_dfa_state); var transitions = ClosureTaker.GatherTransitions(nfa_state_set); foreach (var transition_set in transitions) { // Note, transitions is a collection of edges for a given string. // For the NFA, an edge has one Ast for the edge because it came from one pattern. // But, for the DFA, there could be multiple edges for the same string, // each from a different pattern! Compute the set of Asts for // all edges. var key = transition_set.Key; var value = transition_set.Value; var state_set = new HashSet <State>(); foreach (var e in value) { state_set.Add(e.To); } // Find in all previous states. var new_state_set = _hash_sets.Where(hs => state_set.IsSubsetOf(hs.Key)).FirstOrDefault().Key; if (new_state_set == null) { new_state_set = _closure[state_set.First()]; } var to_dfa_state = FindHashSetState(dfa, new_state_set); var mods = value.First().EdgeModifiers; var asts = new List <IParseTree>(); foreach (var v in value) { foreach (var v2 in v.AstList) { asts.Add(v2); } } var he = new Edge(dfa, from_dfa_state, to_dfa_state, asts, mods); } } // Add in "any" fragment in order to match tree nodes that aren't in pattern. //{ // State s3 = new State(dfa); s3.Commit(); // var e1 = new Edge(dfa, s3, s3, null, Edge.EmptyAst, (int)Edge.EdgeModifiers.Any); e1.Commit(); // var f = new Fragment(s3); //} return(dfa); }