// // Thompson's Construction // public static FiniteAutomaton NFA(Bamboo.Parsing.RegularExpressions.Expression expression) { System.Console.WriteLine("START NFA " + System.DateTime.Now.ToString()); FiniteAutomaton fa = NFA(expression, new Counter()); System.Console.WriteLine("END NFA " + System.DateTime.Now.ToString()); return(fa); }
public static FiniteAutomaton CreateFA(Bamboo.Parsing.RegularExpressions.Expression expression) { FiniteAutomaton nfa = NFA(expression); List <System.Collections.BitArray> alphabet = Alphabet.Create(expression); char[] alphabet_index = new char[128]; for (int i = 0; i < alphabet.Count; i++) { System.Collections.BitArray bitArray = alphabet[i]; for (int j = 0; j < bitArray.Count; j++) { if (bitArray[j]) { if (alphabet_index[j] != 0) { throw new System.Exception("Already assigned."); } alphabet_index[j] = (char)i; } } } Surf.Set alphabet2 = new Surf.Set(); foreach (char ch in nfa.Alphabet) { alphabet2.Add(alphabet_index[ch]); } Surf.Set transitions2 = new Surf.Set(); foreach (Transition transition in nfa.Transitions) { if (transition.Epsilon) { transitions2.Add(transition); } else { transitions2.Add(new Transition(transition.FromState, alphabet_index[transition.Character], transition.ToState)); } } FiniteAutomaton nfa2 = new FiniteAutomaton(); nfa2.States = nfa.States; nfa2.StartState = nfa.StartState; nfa2.FinalStates = nfa.FinalStates; nfa2.Tokens = nfa.Tokens; nfa2.Alphabet = new char[alphabet2.Count]; for (int i = 0; i < alphabet2.Count; i++) { nfa2.Alphabet[i] = (char)alphabet2[i]; } nfa2.Transitions = new Transition[transitions2.Count]; for (int i = 0; i < transitions2.Count; i++) { nfa2.Transitions[i] = (Transition)transitions2[i]; } FiniteAutomaton dfa = DFA(nfa2); FiniteAutomaton fa2 = new FiniteAutomaton(); fa2.States = dfa.States; fa2.StartState = dfa.StartState; fa2.FinalStates = dfa.FinalStates; fa2.Tokens = dfa.Tokens; List <char> alphabet3 = new List <char>(); foreach (char ch in dfa.Alphabet) { System.Collections.BitArray bitArray = alphabet[ch]; for (int i = 0; i < bitArray.Count; i++) { if (bitArray[i]) { alphabet3.Add((char)i); } } } fa2.Alphabet = alphabet3.ToArray(); List <Bamboo.Parsing.FiniteAutomata.Transition> transitions = new List <Transition>(); foreach (Bamboo.Parsing.FiniteAutomata.Transition transition in dfa.Transitions) { if (transition.Epsilon) { transitions.Add(transition); } else { System.Collections.BitArray bitArray = alphabet[transition.Character]; for (int i = 0; i < bitArray.Count; i++) { if (bitArray[i]) { transitions.Add(new Bamboo.Parsing.FiniteAutomata.Transition(transition.FromState, (char)i, transition.ToState)); } } } } fa2.Transitions = transitions.ToArray(); return(fa2); }
private static FiniteAutomaton NFA(Bamboo.Parsing.RegularExpressions.Expression expression, Counter counter) { if (expression is Bamboo.Parsing.RegularExpressions.Literal) { Bamboo.Parsing.RegularExpressions.Literal literal = (Bamboo.Parsing.RegularExpressions.Literal)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int currentState = startState; char[] ach = literal.Value.ToCharArray(); for (int i = 0; i < ach.Length; i++) { char ch = ach[i]; states.Add(currentState); alphabet.Add(ch); int nextState = counter.Next(); transitions.Add(Transition(currentState, ch, nextState)); currentState = nextState; } states.Add(currentState); finalStates.Add(currentState); if (literal.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, literal.TokenType })); } } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Concatenation) { Bamboo.Parsing.RegularExpressions.Concatenation concatenation = (Bamboo.Parsing.RegularExpressions.Concatenation)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); FiniteAutomaton a = NFA(concatenation.A, counter); FiniteAutomaton b = NFA(concatenation.B, counter); states = states.Union(new Surf.Set(a.States)); states = states.Union(new Surf.Set(b.States)); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); alphabet = alphabet.Union(new Surf.Set(b.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions = transitions.Union(Set(b.Transitions)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], b.StartState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, b.StartState)); } startState = a.StartState; finalStates = new Surf.Set(b.FinalStates); if (concatenation.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, concatenation.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } foreach (Token token in b.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Alternation) { Bamboo.Parsing.RegularExpressions.Alternation alternation = (Bamboo.Parsing.RegularExpressions.Alternation)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int finalState = counter.Next(); FiniteAutomaton a = NFA(alternation.A, counter); FiniteAutomaton b = NFA(alternation.B, counter); states = states.Union(new Surf.Set(a.States)); states = states.Union(new Surf.Set(b.States)); states.Add(startState); states.Add(finalState); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); alphabet = alphabet.Union(new Surf.Set(b.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions = transitions.Union(Set(b.Transitions)); transitions.Add(Transition(startState, a.StartState)); transitions.Add(Transition(startState, b.StartState)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], finalState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, finalState)); } //TODO // transitions.Add(Transition((int)b.FinalStates[0], finalState)); foreach (int bFinalState in b.FinalStates) { transitions.Add(Transition(bFinalState, finalState)); } finalStates.Add(finalState); if (alternation.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, alternation.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } foreach (Token token in b.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Repitition) { Bamboo.Parsing.RegularExpressions.Repitition repitition = (Bamboo.Parsing.RegularExpressions.Repitition)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int finalState = counter.Next(); FiniteAutomaton a = NFA(repitition.Expression, counter); states = states.Union(new Surf.Set(a.States)); states.Add(startState); states.Add(finalState); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions.Add(Transition(startState, a.StartState)); transitions.Add(Transition(startState, finalState)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], finalState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, finalState)); } //TODO // transitions.Add(Transition((int)a.FinalStates[0], a.StartState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, a.StartState)); } finalStates.Add(finalState); if (repitition.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, repitition.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else if (expression is Bamboo.Parsing.RegularExpressions.Optional) { Bamboo.Parsing.RegularExpressions.Optional optional = (Bamboo.Parsing.RegularExpressions.Optional)expression; Surf.Set states = new Surf.Set(); Surf.Set alphabet = new Surf.Set(); Surf.Set transitions = new Surf.Set(); int startState; Surf.Set finalStates = new Surf.Set(); Surf.Set tokens = new Surf.Set(); startState = counter.Next(); int finalState = counter.Next(); FiniteAutomaton a = NFA(optional.Expression, counter); states = states.Union(new Surf.Set(a.States)); states.Add(startState); states.Add(finalState); alphabet = alphabet.Union(new Surf.Set(a.Alphabet)); transitions = transitions.Union(Set(a.Transitions)); transitions.Add(Transition(startState, a.StartState)); transitions.Add(Transition(startState, finalState)); //TODO // transitions.Add(Transition((int)a.FinalStates[0], finalState)); foreach (int aFinalState in a.FinalStates) { transitions.Add(Transition(aFinalState, finalState)); } finalStates.Add(finalState); if (optional.TokenType.Length > 0) { foreach (int state in finalStates) { tokens.Add(new Surf.Tuple(new object[] { state, optional.TokenType })); } } foreach (Token token in a.Tokens) { tokens.Add(new Surf.Tuple(new object[] { token.Number, token.Name })); } return(new FiniteAutomaton(states, alphabet, transitions, startState, finalStates, tokens)); } else { throw new System.Exception("Unknown expression type: " + expression.GetType().FullName); } }
public static List <System.Collections.BitArray> Create2(Bamboo.Parsing.RegularExpressions.Expression expression) { return(Partition(Extract(expression, true))); }
private static List <System.Collections.BitArray> Extract(Bamboo.Parsing.RegularExpressions.Expression expression, bool first) //TODO this sucks. { if (expression is Bamboo.Parsing.RegularExpressions.Literal) { Bamboo.Parsing.RegularExpressions.Literal literal = (Bamboo.Parsing.RegularExpressions.Literal)expression; List <System.Collections.BitArray> bitArrays = new List <System.Collections.BitArray>(); char[] ach = literal.Value.ToCharArray(); for (int i = 0; i < ach.Length; i++) { char ch = ach[i]; int n = (int)ch; System.Collections.BitArray bitArray = new System.Collections.BitArray(128); bitArray.Set(n, true); if (!Contains(bitArrays, bitArray)) { bitArrays.Add(bitArray); } } return(bitArrays); } else if (expression is Bamboo.Parsing.RegularExpressions.Concatenation) { Bamboo.Parsing.RegularExpressions.Concatenation concatenation = (Bamboo.Parsing.RegularExpressions.Concatenation)expression; List <System.Collections.BitArray> bitArrays = new List <System.Collections.BitArray>(); foreach (System.Collections.BitArray bitArray in Extract(concatenation.A, false)) { if (!Contains(bitArrays, bitArray)) { bitArrays.Add(bitArray); } } foreach (System.Collections.BitArray bitArray in Extract(concatenation.B, false)) { if (!Contains(bitArrays, bitArray)) { bitArrays.Add(bitArray); } } return(bitArrays); } else if (expression is Bamboo.Parsing.RegularExpressions.Alternation) { Bamboo.Parsing.RegularExpressions.Alternation alternation = (Bamboo.Parsing.RegularExpressions.Alternation)expression; List <System.Collections.BitArray> bitArraysA = Extract(alternation.A, first); List <System.Collections.BitArray> bitArraysB = Extract(alternation.B, first); //TODO this sucks. if (!first && bitArraysA.Count == 1 && bitArraysB.Count == 1) { List <System.Collections.BitArray> bitArrays = new List <System.Collections.BitArray>(); bitArrays.Add(bitArraysA[0].Or(bitArraysB[0])); return(bitArrays); } else { List <System.Collections.BitArray> bitArrays = new List <System.Collections.BitArray>(); foreach (System.Collections.BitArray bitArray in bitArraysA) { if (!Contains(bitArrays, bitArray)) { bitArrays.Add(bitArray); } } foreach (System.Collections.BitArray bitArray in bitArraysB) { if (!Contains(bitArrays, bitArray)) { bitArrays.Add(bitArray); } } return(bitArrays); } } else if (expression is Bamboo.Parsing.RegularExpressions.Repitition) { Bamboo.Parsing.RegularExpressions.Repitition repitition = (Bamboo.Parsing.RegularExpressions.Repitition)expression; return(Extract(repitition.Expression, false)); } else if (expression is Bamboo.Parsing.RegularExpressions.Optional) { Bamboo.Parsing.RegularExpressions.Optional optional = (Bamboo.Parsing.RegularExpressions.Optional)expression; return(Extract(optional.Expression, false)); } else { throw new System.Exception("Unknown expression type: " + expression.GetType().FullName); } }