// // Thompson's Construction // public static FiniteAutomaton NFA(Bamboo.Parsing.RegularExpressions.Expression expression) { System.Console.WriteLine("START NFA " + System.DateTime.Now.ToString()); FiniteAutomaton fa = NFA(expression, new Counter()); System.Console.WriteLine("END NFA " + System.DateTime.Now.ToString()); return(fa); }
// Hopcroft's Algorithm public static FiniteAutomaton Minimize(FiniteAutomaton dfa) { System.Console.WriteLine("START Minimize " + System.DateTime.Now.ToString()); Surf.Set dfa_transitions = Set(dfa.Transitions); // Split final states and non-final states. Surf.Set worklist = new Surf.Set(); Surf.Set P = new Surf.Set(); Surf.Set f = SplitFinalStates(dfa.Tokens); foreach (Surf.Set f2 in f) { worklist.Add(f2); P.Add(f2); } Surf.Set nonFinalStates = new Surf.Set(dfa.States).Difference(new Surf.Set(dfa.FinalStates)); worklist.Add(nonFinalStates); P.Add(nonFinalStates); // While there are more states to split. while (worklist.Count > 0) { Surf.Set p = (Surf.Set)worklist[0]; worklist.Remove(0); Surf.Set t = Split(p, dfa_transitions, P); if (t.Count > 1) { int i = 0; foreach (Surf.Set p2 in P) { if (p2.Equals(p)) { P.Remove(i); break; } i++; } foreach (Surf.Set t2 in t) { worklist.Add(t2); P.Add(t2); } } } /* * // Split final states and non-final states. * Surf.Set P = new Surf.Set(); * Surf.Set f = SplitFinalStates(dfa.Tokens); * foreach (Surf.Set f2 in f) * { * P.Add(f2); * } * P.Add(new Surf.Set(dfa.States).Difference(new Surf.Set(dfa.FinalStates))); * * // While there are more states to split. * bool isChanging = true; * while (isChanging) * { * isChanging = false; * * Surf.Set T = new Surf.Set(); * foreach (Surf.Set p in P) * { * Surf.Set t = Split(p, dfa_transitions, P); * * if (t.Count > 1) * { * isChanging = true; * } * foreach (Surf.Set t2 in t) * { * T.Add(t2); * } * } * P = T; * } */ Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); Surf.Set P_map = PartitionMap(P); foreach (int state in dfa.States) { states.Add(P_map.Apply(state)); } alphabet = new Surf.Set(dfa.Alphabet); foreach (Surf.Tuple transition in dfa_transitions) { Surf.Tuple input = (Surf.Tuple)transition[0]; int fromState = (int)P_map.Apply(input[0]); char character = (char)input[1]; int toState = (int)P_map.Apply(transition[1]); transitions.Add(new Surf.Tuple(new object[] { new Surf.Tuple(new object[] { fromState, character }), toState })); } startState = (int)P_map.Apply(dfa.StartState); foreach (int state in dfa.FinalStates) { finalStates.Add(P_map.Apply(state)); if (!tokens.IsDefined(P_map.Apply(state))) { tokens.Add(new Surf.Tuple(new object[] { P_map.Apply(state), Lookup(dfa.Tokens, state) })); } } System.Console.WriteLine("END Minimize " + System.DateTime.Now.ToString()); return(Reorder(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens))); }
public static FiniteAutomaton CreateFA(Bamboo.Parsing.RegularExpressions.Expression expression) { FiniteAutomaton nfa = NFA(expression); List <System.Collections.BitArray> alphabet = Alphabet.Create(expression); char[] alphabet_index = new char[128]; for (int i = 0; i < alphabet.Count; i++) { System.Collections.BitArray bitArray = alphabet[i]; for (int j = 0; j < bitArray.Count; j++) { if (bitArray[j]) { if (alphabet_index[j] != 0) { throw new System.Exception("Already assigned."); } alphabet_index[j] = (char)i; } } } Surf.Set alphabet2 = new Surf.Set(); foreach (char ch in nfa.Alphabet) { alphabet2.Add(alphabet_index[ch]); } Surf.Set transitions2 = new Surf.Set(); foreach (Transition transition in nfa.Transitions) { if (transition.Epsilon) { transitions2.Add(transition); } else { transitions2.Add(new Transition(transition.FromState, alphabet_index[transition.Character], transition.ToState)); } } FiniteAutomaton nfa2 = new FiniteAutomaton(); nfa2.States = nfa.States; nfa2.StartState = nfa.StartState; nfa2.FinalStates = nfa.FinalStates; nfa2.Tokens = nfa.Tokens; nfa2.Alphabet = new char[alphabet2.Count]; for (int i = 0; i < alphabet2.Count; i++) { nfa2.Alphabet[i] = (char)alphabet2[i]; } nfa2.Transitions = new Transition[transitions2.Count]; for (int i = 0; i < transitions2.Count; i++) { nfa2.Transitions[i] = (Transition)transitions2[i]; } FiniteAutomaton dfa = DFA(nfa2); FiniteAutomaton fa2 = new FiniteAutomaton(); fa2.States = dfa.States; fa2.StartState = dfa.StartState; fa2.FinalStates = dfa.FinalStates; fa2.Tokens = dfa.Tokens; List <char> alphabet3 = new List <char>(); foreach (char ch in dfa.Alphabet) { System.Collections.BitArray bitArray = alphabet[ch]; for (int i = 0; i < bitArray.Count; i++) { if (bitArray[i]) { alphabet3.Add((char)i); } } } fa2.Alphabet = alphabet3.ToArray(); List <Bamboo.Parsing.FiniteAutomata.Transition> transitions = new List <Transition>(); foreach (Bamboo.Parsing.FiniteAutomata.Transition transition in dfa.Transitions) { if (transition.Epsilon) { transitions.Add(transition); } else { System.Collections.BitArray bitArray = alphabet[transition.Character]; for (int i = 0; i < bitArray.Count; i++) { if (bitArray[i]) { transitions.Add(new Bamboo.Parsing.FiniteAutomata.Transition(transition.FromState, (char)i, transition.ToState)); } } } } fa2.Transitions = transitions.ToArray(); return(fa2); }
private static FiniteAutomaton NFA(Bamboo.Parsing.RegularExpressions.Expression expression, Counter counter) { if (expression is Bamboo.Parsing.RegularExpressions.Literal) { Bamboo.Parsing.RegularExpressions.Literal literal = (Bamboo.Parsing.RegularExpressions.Literal)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int currentState = startState; char[] ach = literal.Value.ToCharArray(); for (int i = 0; i < ach.Length; i++) { char ch = ach[i]; states.Add(currentState); alphabet.Add(ch); int nextState = counter.Next(); transitions.Add(Transition(currentState, ch, nextState)); currentState = nextState; } states.Add(currentState); finalStates.Add(currentState); if (literal.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, literal.TokenType })); } } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Concatenation) { Bamboo.Parsing.RegularExpressions.Concatenation concatenation = (Bamboo.Parsing.RegularExpressions.Concatenation)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); FiniteAutomaton a = NFA(concatenation.A, counter); FiniteAutomaton b = NFA(concatenation.B, counter); states = states.Union(new Surf.Set(a.States)); states = states.Union(new Surf.Set(b.States)); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); alphabet = alphabet.Union(new Surf.Set(b.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions = transitions.Union(Set(b.Transitions)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], b.StartState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, b.StartState)); } startState = a.StartState; finalStates = new Surf.Set(b.FinalStates); if (concatenation.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, concatenation.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } foreach (Token token in b.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Alternation) { Bamboo.Parsing.RegularExpressions.Alternation alternation = (Bamboo.Parsing.RegularExpressions.Alternation)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int finalState = counter.Next(); FiniteAutomaton a = NFA(alternation.A, counter); FiniteAutomaton b = NFA(alternation.B, counter); states = states.Union(new Surf.Set(a.States)); states = states.Union(new Surf.Set(b.States)); states.Add(startState); states.Add(finalState); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); alphabet = alphabet.Union(new Surf.Set(b.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions = transitions.Union(Set(b.Transitions)); transitions.Add(Transition(startState, a.StartState)); transitions.Add(Transition(startState, b.StartState)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], finalState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, finalState)); } //TODO // transitions.Add(Transition((int)b.FinalStates[0], finalState)); foreach (int bFinalState in b.FinalStates) { transitions.Add(Transition(bFinalState, finalState)); } finalStates.Add(finalState); if (alternation.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, alternation.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } foreach (Token token in b.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Repitition) { Bamboo.Parsing.RegularExpressions.Repitition repitition = (Bamboo.Parsing.RegularExpressions.Repitition)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int finalState = counter.Next(); FiniteAutomaton a = NFA(repitition.Expression, counter); states = states.Union(new Surf.Set(a.States)); states.Add(startState); states.Add(finalState); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions.Add(Transition(startState, a.StartState)); transitions.Add(Transition(startState, finalState)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], finalState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, finalState)); } //TODO // transitions.Add(Transition((int)a.FinalStates[0], a.StartState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, a.StartState)); } finalStates.Add(finalState); if (repitition.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, repitition.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Optional) { Bamboo.Parsing.RegularExpressions.Optional optional = (Bamboo.Parsing.RegularExpressions.Optional)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int finalState = counter.Next(); FiniteAutomaton a = NFA(optional.Expression, counter); states = states.Union(new Surf.Set(a.States)); states.Add(startState); states.Add(finalState); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions.Add(Transition(startState, a.StartState)); transitions.Add(Transition(startState, finalState)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], finalState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, finalState)); } finalStates.Add(finalState); if (optional.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, optional.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else { throw new System.Exception("Unknown expression type: " + expression.GetType().FullName); } }