/// <summary> /// Create a new regular expression parser using these steps: /// 1) format the regular expresison /// 2) postfix the regex for efficiency reason (see http://www.cs.man.ac.uk/~pjj/cs2121/fix.html for the algorithm details) /// 3) Construction of Non-Deterministic automata using Thompson's Construction Algorithm (see page 153 of Dragon Book for algorithm details) /// 4) Convert Non-Deterministic Automata to and equivalent Deterministic Automata. /// 5) Optimize the Deterministic Automata by removing dead-transitions/states using (Minimize Automata Algorithm). /// </summary> /// <param name="regularExpress">The regular expression to create</param> public RegularExpression(string regularExpress) { _parser = new RegularExpressionParser(); try { Debug.WriteLine(""); Stopwatch t = new Stopwatch(); t.Reset(); t.Start(); // Save the original expression _originalRegularExpression = regularExpress; // Obtaint the explicit regex expression t.Reset(); t.Start(); _formattedRegularExpression = _parser.ParseRegEx(_originalRegularExpression); // it it terminates the string is sintactically correct t.Stop(); Debug.WriteLine("Regular expression parsed in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); // Optimize the string from infix to postfix equivalent expression _postfixRegularExpression = RegularExpressionParser.ConvertToPostfix(_formattedRegularExpression); // just to apply an efficient LL1 grammar // Regular expression is now in postfix mode: create a Non-Deterministic automata t.Reset(); t.Start(); _NDAutomata = AutomataWrapper.CreateNFAutomata(_postfixRegularExpression); t.Stop(); Debug.WriteLine("NFA generated in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); // Reduce the number of states and transitions creating an equivalent Deterministic Automata t.Reset(); t.Start(); _DAutomata = AutomataWrapper.CreateDAutomata(_NDAutomata); t.Stop(); Debug.WriteLine("DFA generated in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); // Optimize the Deterministic automata t.Reset(); t.Start(); _optimizedDAutomata = AutomataWrapper.MinimizeDAutomata(_DAutomata); t.Stop(); Debug.WriteLine("Optimized DFA generated in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); } catch (RegularExpressionParser.RegularExpressionParserException e) { throw e; } }
//OK /// <summary> /// Converts NDAutomata to DAutomata using "Subset Construction" algorithm. /// The algoritm is described at page 153 (Algoritm 3.20) of the Dragon Book. /// </summary> /// <param name="_NFAutomata">Non-Deterministic automata to convert</param> /// <returns>Equivalent Deterministic Automata</returns> public static DAutomata CreateDAutomata(NDAutomata _NFAutomata) { List<string> setAllInput = _NFAutomata.Chars.ToList(); NDAutomata.NDAutomataState[] setAllState = _NFAutomata.States; AutomataWrapper helper = new AutomataWrapper(); List<NDAutomata.NDAutomataState> setMove = null; List<NDAutomata.NDAutomataState> setEpsilonClosure = null; string charS = String.Empty; DAutomata.DAutomataState stateT = null; List<NDAutomata.NDAutomataState> setT = null; DAutomata.DAutomataState stateU = null; setAllInput.Remove(RegularExpressionParser.MetaCharsTranslations.EpsilonChar); setEpsilonClosure = GetEpsilonClosure(_NFAutomata.StartState); DAutomata.DAutomataState startDAutomataState = new DAutomata.DAutomataState(); if (IsFinalGroup(setEpsilonClosure) == true) startDAutomataState.IsFinal = true; helper.AddStateToMap(startDAutomataState, setEpsilonClosure); while (helper.GetUnmarkedState(out stateT)) { helper.MarkState(stateT); setT = helper.GetClosureFromState(stateT); foreach (object obj in setAllInput) { charS = obj.ToString(); setMove = Move(setT, charS); if (setMove.Count > 0) { setEpsilonClosure = GetEpsilonClosure(setMove); stateU = helper.GetStateFromClosure(setEpsilonClosure); if (stateU == null) // so set setEpsilonClosure must be a new one and we should crate a new DAutomata state { stateU = new DAutomata.DAutomataState(); if (IsFinalGroup(setEpsilonClosure) == true) stateU.IsFinal = true; helper.AddStateToMap(stateU, setEpsilonClosure); // add new state (as unmarked by default) } stateT.AddTransition(charS, stateU); } } } return new DAutomata(startDAutomataState); }
/// <summary> /// Apply the Thompson’s Algorithm to create an automata from a regular expression in posfix form /// </summary> /// <param name="sRegExPosfix">Regulare expression in postfix form</param> /// <returns>Corrispondent Non Deterministic automata</returns> public static NDAutomata CreateNFAutomata(string sRegExPosfix) { Stack<NDAutomataEdge> NFAutomataStack = new Stack<NDAutomataEdge>(); NDAutomataEdge expr, tempExpr1, tempExpr2, newExpr; bool inEscapeChar = false; foreach (char curChar in sRegExPosfix) { if (!inEscapeChar) { if (curChar == RegularExpressionParser.MetaChars.EscapeChar) { inEscapeChar = true; continue; } } else { newExpr = new NDAutomataEdge(); newExpr.FromState.AddTransition(curChar.ToString(), newExpr.ToState); NFAutomataStack.Push(newExpr); inEscapeChar = false; continue; } switch (curChar) { case RegularExpressionParser.MetaChars.KleeneStar: // A* Kleene star newExpr = new NDAutomataEdge(); tempExpr1 = NFAutomataStack.Pop(); tempExpr1.ToState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, tempExpr1.FromState); tempExpr1.ToState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, newExpr.ToState); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, tempExpr1.FromState); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, newExpr.ToState); NFAutomataStack.Push(newExpr); break; case RegularExpressionParser.MetaChars.PatternAlternate: // A|B tempExpr2 = NFAutomataStack.Pop(); tempExpr1 = NFAutomataStack.Pop(); newExpr = new NDAutomataEdge(); tempExpr1.ToState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, newExpr.ToState); tempExpr2.ToState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, newExpr.ToState); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, tempExpr1.FromState); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, tempExpr2.FromState); NFAutomataStack.Push(newExpr); break; case RegularExpressionParser.MetaChars.PatternConcatenate: // "a$b" (or "ab" in postfix form) tempExpr2 = NFAutomataStack.Pop(); tempExpr1 = NFAutomataStack.Pop(); tempExpr1.ToState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, tempExpr2.FromState); newExpr = new NDAutomataEdge(tempExpr1.FromState, tempExpr2.ToState); NFAutomataStack.Push(newExpr); break; case RegularExpressionParser.MetaChars.OptionalPattern: // A? => A|empty tempExpr1 = NFAutomataStack.Pop(); newExpr = new NDAutomataEdge(); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, tempExpr1.FromState); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, newExpr.ToState); tempExpr1.ToState.AddTransition(RegularExpressionParser.MetaCharsTranslations.EpsilonChar, newExpr.ToState); NFAutomataStack.Push(newExpr); break; case RegularExpressionParser.MetaChars.JollyChar: newExpr = new NDAutomataEdge(); newExpr.FromState.AddTransition(RegularExpressionParser.MetaCharsTranslations.JollyCharTrans, newExpr.ToState); NFAutomataStack.Push(newExpr); break; default: newExpr = new NDAutomataEdge(); newExpr.FromState.AddTransition(curChar.ToString(), newExpr.ToState); NFAutomataStack.Push(newExpr); break; } } expr = NFAutomataStack.Pop(); // pop the very last one. YES, THERE SHOULD ONLY BE ONE LEFT AT THIS POINT expr.ToState.IsFinal = true; // the very last state is the accepting state of the NFA NDAutomata.NDAutomataState startState = expr.FromState; NDAutomata newNFAutomata = new NDAutomata(startState); return newNFAutomata; // retun the NFA }
/// <summary> /// Build an edge from fromState to toState /// </summary> /// <param name="fromState">Start state for the edge</param> /// <param name="toState">End State of the edge</param> public NDAutomataEdge(NDAutomata.NDAutomataState fromState, NDAutomata.NDAutomataState toState) { _fromState = fromState; _toState = toState; }
//OK /// <summary> /// Collection of NDAutomataStates to which there is a transition on symbol charS for state /// </summary> /// <param name="state">State to check in</param> /// <param name="chInputSymbol">Symbol to check</param> /// <returns>Set of Moves</returns> private static List<NDAutomata.NDAutomataState> Move(NDAutomata.NDAutomataState state, string charSymbol) { List<NDAutomata.NDAutomataState> collectionStates = new List<NDAutomata.NDAutomataState>(); List<NDAutomata.NDAutomataState> transitions = state.GetTransitions(charSymbol); if (transitions != null) collectionStates.AddRange(transitions); return collectionStates; }
//OK /// <summary> /// Finds all state reachable from the specic state on Epsilon transition. /// For details see Dragon book on page 153. /// </summary> /// <param name="stateStart">State from which search begins</param> /// <returns>A set of all state reachable from teh startState on Epsilon transtion</returns> private static List<NDAutomata.NDAutomataState> GetEpsilonClosure(NDAutomata.NDAutomataState stateStart) { List<NDAutomata.NDAutomataState> setProcessed = new List<NDAutomata.NDAutomataState>(); List<NDAutomata.NDAutomataState> setUnprocessed = new List<NDAutomata.NDAutomataState>(); setUnprocessed.Add(stateStart); while (setUnprocessed.Count > 0) { NDAutomata.NDAutomataState state = (NDAutomata.NDAutomataState)setUnprocessed[0]; List<NDAutomata.NDAutomataState> arrTrans = state.GetTransitions(RegularExpressionParser.MetaCharsTranslations.EpsilonChar); setProcessed.Add(state); setUnprocessed.Remove(state); if (arrTrans != null) { foreach (NDAutomata.NDAutomataState stateEpsilon in arrTrans) { if (!setProcessed.Contains(stateEpsilon)) setUnprocessed.Add(stateEpsilon); } } } return setProcessed; }