/// <summary> /// Create a new regular expression parser using these steps: /// 1) format the regular expresison /// 2) postfix the regex for efficiency reason (see http://www.cs.man.ac.uk/~pjj/cs2121/fix.html for the algorithm details) /// 3) Construction of Non-Deterministic automata using Thompson's Construction Algorithm (see page 153 of Dragon Book for algorithm details) /// 4) Convert Non-Deterministic Automata to and equivalent Deterministic Automata. /// 5) Optimize the Deterministic Automata by removing dead-transitions/states using (Minimize Automata Algorithm). /// </summary> /// <param name="regularExpress">The regular expression to create</param> public RegularExpression(string regularExpress) { _parser = new RegularExpressionParser(); try { Debug.WriteLine(""); Stopwatch t = new Stopwatch(); t.Reset(); t.Start(); // Save the original expression _originalRegularExpression = regularExpress; // Obtaint the explicit regex expression t.Reset(); t.Start(); _formattedRegularExpression = _parser.ParseRegEx(_originalRegularExpression); // it it terminates the string is sintactically correct t.Stop(); Debug.WriteLine("Regular expression parsed in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); // Optimize the string from infix to postfix equivalent expression _postfixRegularExpression = RegularExpressionParser.ConvertToPostfix(_formattedRegularExpression); // just to apply an efficient LL1 grammar // Regular expression is now in postfix mode: create a Non-Deterministic automata t.Reset(); t.Start(); _NDAutomata = AutomataWrapper.CreateNFAutomata(_postfixRegularExpression); t.Stop(); Debug.WriteLine("NFA generated in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); // Reduce the number of states and transitions creating an equivalent Deterministic Automata t.Reset(); t.Start(); _DAutomata = AutomataWrapper.CreateDAutomata(_NDAutomata); t.Stop(); Debug.WriteLine("DFA generated in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); // Optimize the Deterministic automata t.Reset(); t.Start(); _optimizedDAutomata = AutomataWrapper.MinimizeDAutomata(_DAutomata); t.Stop(); Debug.WriteLine("Optimized DFA generated in " + ((double)t.ElapsedTicks / 10000.0).ToString("0.00000")); } catch (RegularExpressionParser.RegularExpressionParserException e) { throw e; } }
/// <summary> /// Mark a state /// </summary> /// <param name="stateT">DAutomataState to set as Marked</param> private void MarkState(DAutomata.DAutomataState stateT) { mapDStateToEnclosure[stateT].Marked = true; }
/// <summary> /// Internal call for IsMatch call that check if the string matchs this regular expression /// </summary> /// <param name="inputString">String to match with this regular expression</param> /// <param name="startState">Start state (usualy the automata start state)</param> /// <param name="startFrom">Start index for the inputString</param> /// <returns>true if inputString matchs this regular expression</returns> private static bool IsMatch(ref string inputString, DAutomata.DAutomataState startState, int startFrom=0) { DAutomata.DAutomataState toState = null; DAutomata.DAutomataState stateCurr = startState; DAutomata.DAutomataState jollyState = null; for (int i = startFrom; i < inputString.Length; i++) { jollyState = stateCurr.GetTransition(RegularExpressionParser.MetaCharsTranslations.JollyCharTrans); if (jollyState != null) // ambiguous state: current char can match also with "." pattern! // Let's check recoursivelly if jollyState is a valid transition from this start point // (rembering that * operator is "greedy" and can also match a char before "." operator) if (IsMatch(ref inputString, jollyState, i + 1)) // backtrack implementation return true; char chInputSymbol = inputString[i]; toState = stateCurr.GetTransition(chInputSymbol.ToString()); // get next transition that match this char if (toState == null) return false; stateCurr = toState; } return stateCurr.IsFinal; }
private List<NDAutomata.NDAutomataState> GetClosureFromState(DAutomata.DAutomataState state) { return mapDStateToEnclosure[state].Closure; }
/// <summary> /// Get the first unmarked state from the current map /// </summary> /// <returns>An unmarked state</returns> private bool GetUnmarkedState(out DAutomata.DAutomataState state) { foreach (var stateRecord in mapDStateToEnclosure) { if (!stateRecord.Value.Marked) { state = stateRecord.Key; return true; } } state = default(DAutomata.DAutomataState); return false; }
//OK /// <summary> /// Add a new association (DAutomataState, NDAutomataState[]) to the map /// </summary> /// <param name="stateDfa">Deterministic automata state</param> /// <param name="setEpsilonClosure">Epsilon closure</param> private void AddStateToMap(DAutomata.DAutomataState stateDfa, List<NDAutomata.NDAutomataState> setEpsilonClosure) { EpsilonClosure stateRecord = new EpsilonClosure(); stateRecord.Closure = setEpsilonClosure; mapDStateToEnclosure[stateDfa] = stateRecord; }
//OK /// <summary> /// Find a set in a list of groups for a specific state. /// </summary> /// <param name="groups">List of groups</param> /// <param name="state">State to search for</param> /// <returns>Set the state belongs to</returns> private static List<DAutomata.DAutomataState> FindGroup(List<List<DAutomata.DAutomataState>> groups, DAutomata.DAutomataState state) { foreach (var set in groups) { if (set.Contains(state)) return set; } return null; }
//OK /// <summary> /// Merge an hash element if exist already an association or create a new association (setFound-->{state}) /// </summary> /// <param name="hash">The hash table containig the map (group; {states})</param> /// <param name="state">State to add in the map</param> /// <param name="setFound">key of the map</param> private static void CreateOrAddToMap(Hashtable hash, DAutomata.DAutomataState state, List<DAutomata.DAutomataState> setFound) { if (!hash.ContainsKey(setFound)) { var cur = new List<DAutomata.DAutomataState>(); cur.Add(state); hash.Add(setFound, cur); } else { var cur = (List<DAutomata.DAutomataState>)hash[setFound]; cur.Add(state); hash.Remove(setFound); hash.Add(setFound, cur); } }
//OK /// <summary> /// Minimize a Deterministic Automata using the minimize Algorithm (described here http://www.cs.engr.uky.edu/~lewis/essays/compilers/min-fa.html) /// </summary> /// <param name="_originalDAutomata">Deterministic Automata to reduce</param> /// <returns>Minimized Deterministic Automata</returns> public static DAutomata MinimizeDAutomata(DAutomata _originalDAutomata) { List<string> transSymbols = _originalDAutomata.Chars.ToList(); List<DAutomata.DAutomataState> DAutomataStates = _originalDAutomata.States.ToList(); DAutomata.DAutomataState startMinimizedState = null; var arr = PartitionGroupsDAutomata(DAutomataStates, transSymbols); foreach (var setGroup in arr) { // check final states SzeroDFA in the group bool finalStInGroup = GroupContainsFinalGroup(setGroup); bool startDFAinGroup = setGroup.Contains(_originalDAutomata.StartState); DAutomata.DAutomataState examinatedState = (DAutomata.DAutomataState)setGroup[0]; if (startDFAinGroup) startMinimizedState = examinatedState; if (finalStInGroup) examinatedState.IsFinal = true; if (setGroup.Count == 1) continue; setGroup.Remove(examinatedState); int numOfReplace = 0; foreach (var stateToBeReplaced in setGroup) { DAutomataStates.Remove(stateToBeReplaced); foreach (var objState in DAutomataStates) { numOfReplace = numOfReplace + objState.ReplaceTransitionState(stateToBeReplaced, examinatedState); } } } foreach (var state in DAutomataStates) { if (state.IsDeadState()) DAutomataStates.Remove(state); } DAutomata newOptimizedDFA = new DAutomata(startMinimizedState); return newOptimizedDFA; }
/// <summary> /// Compile a class from source code /// </summary> /// <param name="automata">A Deterministic automata to convert into an automata class</param> /// <returns>true if compile success</returns> /// <exception cref="ShallowCompiler.CompilerError">Compiler errors occurs</exception> public string CompileCode(DAutomata automata) { // C# compiler helper class CSharpCodeProvider codeProvider = new CSharpCodeProvider(); // Compiler parameters CompilerParameters parameters = new CompilerParameters(); // generate in memory compilation (not an EXE/DLL file) parameters.GenerateInMemory = true; parameters.GenerateExecutable = false; // Add all the DLL reference to perform the compilation parameters.ReferencedAssemblies.Add("System.dll"); parameters.ReferencedAssemblies.Add("System.Data.dll"); parameters.ReferencedAssemblies.Add("System.Core.dll"); parameters.ReferencedAssemblies.Add("MidTermAP.dll"); // Generate the automata class source code! StringBuilder generateClass = new StringBuilder(); DAutomata.DAutomataState[] allStates = automata.States; string[] inputSymbols = automata.Chars; generateClass.AppendLine("using System;"); //generateClass.AppendLine("using System.Collections.Generic;"); //generateClass.AppendLine("using System.ComponentModel;"); generateClass.AppendLine("using System.Data;"); //generateClass.AppendLine("using System.Drawing;"); //generateClass.AppendLine("using System.Text;"); generateClass.AppendLine("using Exercise1;"); generateClass.AppendLine(""); generateClass.AppendLine("namespace Exercise1"); generateClass.AppendLine("{"); generateClass.AppendLine(" public class MyRegExExecutor: Exercise1.ICompilable"); generateClass.AppendLine(" {"); generateClass.AppendLine(""); generateClass.AppendLine(" public bool IsMatch(string str)"); generateClass.AppendLine(" {"); generateClass.AppendLine(" return IsMatch(ref str, " + automata.StartState.ID + ", 0);"); generateClass.AppendLine(" }"); generateClass.AppendLine(""); generateClass.AppendLine(" private bool IsMatch(ref string str, int stState = 0, int stIndex = 0)"); generateClass.AppendLine(" {"); generateClass.AppendLine(" int curState = stState;"); generateClass.AppendLine(" for(int i = stIndex; i<str.Length; i++)"); generateClass.AppendLine(" {"); generateClass.AppendLine(" switch(curState)"); generateClass.AppendLine(" {"); foreach (var st in allStates) { generateClass.AppendLine(" case " + st.ID + ":"); generateClass.AppendLine(" switch(str[i])"); generateClass.AppendLine(" {"); foreach (string chr in inputSymbols) { DAutomata.DAutomataState nextS = st.GetTransition(chr); if (nextS == null || chr == RegularExpressionParser.MetaCharsTranslations.JollyCharTrans) continue; generateClass.AppendLine(" case '" + (chr[0] == '\\' ? "\\" : chr[0].ToString()) + "':"); var jollyTrans = st.GetTransition(RegularExpressionParser.MetaCharsTranslations.JollyCharTrans); if (jollyTrans != null) { generateClass.AppendLine(" if(IsMatch(ref str, " + jollyTrans.ID + ", i+1)) return true;"); } if(st.ID!=nextS.ID) generateClass.AppendLine(" curState = " + nextS.ID + ";"); generateClass.AppendLine(" break;"); } DAutomata.DAutomataState dotS = st.GetTransition(RegularExpressionParser.MetaCharsTranslations.JollyCharTrans); generateClass.AppendLine(" default:"); if (dotS == null) generateClass.AppendLine(" return false;"); else { generateClass.AppendLine(" curState = " + dotS.ID + ";"); generateClass.AppendLine(" break;"); } generateClass.AppendLine(" }"); generateClass.AppendLine(" break;"); } generateClass.AppendLine(" }"); generateClass.AppendLine(" }"); string orStates = ""; foreach (var st in automata.States) { if (st.IsFinal) { if (orStates != "") orStates += " || "; orStates += "curState == " + st.ID; } } generateClass.AppendLine(" return (" + orStates + ");"); generateClass.AppendLine(" }"); generateClass.AppendLine(" }"); generateClass.AppendLine("}"); // Compile the generated source code CompilerResults results = codeProvider.CompileAssemblyFromSource(parameters, generateClass.ToString()); // Catch possible compiler errors if (!results.Errors.HasErrors) { // no errors => get the copiled class "MyRegExExecutor" this.assembly = results.CompiledAssembly; compiledClass = (ICompilable)Activator.CreateInstance(assembly.GetTypes()[0]); return generateClass.ToString(); } else { // there are some compile errors. throws it! throw new CompilerException(results.Errors); } }