/// <summary> /// Creates multiple example regexp. /// </summary> public static void CreateExampleRegExp() { // regex : (aa)*(aa)+ string regex1 = "(aa)*(aa)+"; RegExpression expression1 = new RegExpression(regex1); Console.WriteLine($"Language (aa)*(aa)+ \n" + $"Language via toString method = {expression1.ToString()}\n"); // regex : a* (aa+ | ba*b ) * (abba | baab | bbbb)+ string regex2 = "a* (aa+ | ba*b ) * (abba | baab | bbbb)+"; RegExpression expression2 = new RegExpression(regex2); Console.WriteLine("Language a* (aa+ | ba*b ) * (abba | baab | bbbb)+ \n" + $"Language via toString method = {expression2.ToString()}\n" + $"Word asdasdsd \n\n"); // regex : (a*b*)+ (bb*b | ab*baa)+ string regex3 = "(a*b*)+ (bb*b | ab*baa)+"; RegExpression expression3 = new RegExpression(regex3); Console.WriteLine("Language (a*b*)+ (bb*b | ab*baa)+ \n" + $"Language via toString method = {expression3.ToString()}\n" + $"Word asdadsad \n\n"); }
/// <summary> /// Creates the example thompson construction. /// </summary> public static void CreateExampleThompsonConstruction() { // regex : (aa)*(aa)+ string regex1 = "(aa)*(aa)+"; RegExpression expression1 = new RegExpression(regex1); Automata <string> ndfa1 = ThompsonConstruction.RegExpToNDFA(expression1); ndfa1.GenerateGraphFile("ThompsonNDFA1"); FileDotEngine.Run(@".\..\..\graphviz\dotfiles\ThompsonNDFA1", "ThompsonNDFA1Pic"); //// regex : a(a|b)* string regex2 = "a(a|b)*"; RegExpression expression2 = new RegExpression(regex2); Automata <string> ndfa2 = ThompsonConstruction.RegExpToNDFA(expression2); ndfa2.GenerateGraphFile("ThompsonNDFA2"); FileDotEngine.Run(@".\..\..\graphviz\dotfiles\ThompsonNDFA2", "ThompsonNDFA2Pic"); // regex : a*(aa+|(ba))* (abba|baab|bbbb)+ string regex3 = "a*(aa+|(ba))* (abba|baab|bbbb)+"; RegExpression expression3 = new RegExpression(regex3); Automata <string> ndfa3 = ThompsonConstruction.RegExpToNDFA(expression3); ndfa3.GenerateGraphFile("ThompsonNDFA3"); FileDotEngine.Run(@".\..\..\graphviz\dotfiles\ThompsonNDFA3", "ThompsonNDFA3Pic"); }
/// <summary> /// Converts the regexpress units. /// </summary> /// <param name="regExp">The reg exp.</param> /// <param name="ndfa">The ndfa.</param> /// <param name="stateCounter">The state counter.</param> /// <param name="leftState">State of the left.</param> /// <param name="rightState">State of the right.</param> public static void Convert(RegExpression regExp, ref Automata <string> ndfa, ref int stateCounter, int leftState, int rightState) { switch (regExp.o) { case RegExpression.Operator.PLUS: Plus(regExp, ref ndfa, ref stateCounter, leftState, rightState); break; case RegExpression.Operator.STAR: Star(regExp, ref ndfa, ref stateCounter, leftState, rightState); break; case RegExpression.Operator.OR: Or(regExp, ref ndfa, ref stateCounter, leftState, rightState); break; case RegExpression.Operator.DOT: Dot(regExp, ref ndfa, ref stateCounter, leftState, rightState); break; case RegExpression.Operator.ONE: One(regExp, ref ndfa, ref stateCounter, leftState, rightState); break; } }
/// <summary> /// Initializes a new instance of the <see cref="RegExp"/> class. /// </summary> public RegExpression() { o = Operator.ONE; terminals = ""; left = null; right = null; }
/// <summary> /// Ones the specified reg exp. /// </summary> /// <param name="regExp">The reg exp.</param> /// <param name="automaat">The automaat.</param> /// <param name="stateCounter">The state counter.</param> /// <param name="leftState">State of the left.</param> /// <param name="rightState">State of the right.</param> public static void One(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState) { char[] characters = regExp.terminals.ToCharArray(); if (characters.Length == 1) { // Create 1 letter transition automaat.AddTransition( new Transition <string>(leftState.ToString(), characters[0], rightState.ToString())); } else { // Create transition for multiple letters automaat.AddTransition( new Transition <string>(leftState.ToString(), characters[0], stateCounter.ToString())); int i = 1; while (i < characters.Length - 1) { automaat.AddTransition(new Transition <string>(stateCounter.ToString(), characters[i], (stateCounter + 1).ToString())); stateCounter++; i++; } automaat.AddTransition( new Transition <string>(stateCounter.ToString(), characters[i], rightState.ToString())); stateCounter++; } }
/// <summary> /// Converts . in the regex /// </summary> /// <param name="regExp">The reg exp.</param> /// <param name="automaat">The automaat.</param> /// <param name="stateCounter">The state counter.</param> /// <param name="leftState">State of the left.</param> /// <param name="rightState">State of the right.</param> public static void Dot(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState) { int midState = stateCounter; stateCounter++; Convert(regExp.left, ref automaat, ref stateCounter, leftState, midState); Convert(regExp.right, ref automaat, ref stateCounter, midState, rightState); }
/// <summary> /// Initializes a new instance of the <see cref="RegExp"/> class. /// </summary> /// <param name="p">The p.</param> public RegExpression(string regex) { RegExpression regExp = StringToRegExpression(new RegExpression(), regex); o = regExp.o; terminals = regExp.terminals; left = regExp.left; right = regExp.right; }
/// <summary> /// Initializes a new instance of the <see cref="T:System.Object" /> class. /// </summary> /// <returns></returns> public RegExpression Star() { RegExpression res = new RegExpression { o = Operator.STAR, left = this, }; return(res); }
/// <summary> /// Pluses this instance. /// </summary> /// <returns></returns> public RegExpression Plus() { RegExpression res = new RegExpression { o = Operator.PLUS, left = this }; return(res); }
/// <summary> /// Ors this instance. /// </summary> /// <returns></returns> public RegExpression Or(RegExpression e2) { RegExpression res = new RegExpression { o = Operator.OR, left = this, right = e2 }; return(res); }
/// <summary> /// Converts + in the regex /// </summary> /// <param name="regExp">The reg exp.</param> /// <param name="automaat">The automaat.</param> /// <param name="stateCounter">The state counter.</param> /// <param name="leftState">State of the left.</param> /// <param name="rightState">State of the right.</param> public static void Plus(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState) { int stateTwo = stateCounter; int stateThree = stateCounter + 1; stateCounter = stateCounter + 2; // Create epsilon transitions automaat.AddTransition(new Transition <string>(leftState.ToString(), '$', stateTwo.ToString())); automaat.AddTransition(new Transition <string>(stateThree.ToString(), '$', stateTwo.ToString())); automaat.AddTransition(new Transition <string>(stateThree.ToString(), '$', rightState.ToString())); // Convert the middle part Convert(regExp.left, ref automaat, ref stateCounter, stateTwo, stateThree); }
/// <summary> /// Converts regular expression to NDFA. /// </summary> /// <param name="regExpression">The reg expression.</param> /// <returns></returns> public static Automata <string> RegExpToNDFA(RegExpression regExpression) { Automata <string> ndfa = new Automata <string>(); ndfa.DefineAsStartState("0"); ndfa.DefineAsFinalState("1"); int stateCounter = 2; Convert(regExpression, ref ndfa, ref stateCounter, 0, 1); ndfa.symbols = new SortedSet <char>(ndfa.transitions.Distinct().Select(e => e.GetSymbol()).ToList()); // Remove epsilons ndfa.symbols.Remove('$'); return(ndfa); }
/// <summary> /// Converts | in the regex /// </summary> /// <param name="regExp">The reg exp.</param> /// <param name="automaat">The automaat.</param> /// <param name="stateCounter">The state counter.</param> /// <param name="leftState">State of the left.</param> /// <param name="rightState">State of the right.</param> public static void Or(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState) { int state2 = stateCounter; int state3 = stateCounter + 1; int state4 = stateCounter + 2; int state5 = stateCounter + 3; stateCounter = stateCounter + 4; // Create epsilon transitions automaat.AddTransition(new Transition <string>(leftState.ToString(), '$', state2.ToString())); automaat.AddTransition(new Transition <string>(leftState.ToString(), '$', state4.ToString())); automaat.AddTransition(new Transition <string>(state3.ToString(), '$', rightState.ToString())); automaat.AddTransition(new Transition <string>(state5.ToString(), '$', rightState.ToString())); // Convert the middle part of both middle parts Convert(regExp.left, ref automaat, ref stateCounter, state2, state3); Convert(regExp.right, ref automaat, ref stateCounter, state4, state5); }
/// <summary> /// Reg expression 3 testData /// </summary> public TestRegExp() { a = new RegExpression("a"); b = new RegExpression("b"); // expr1: "baa" expr1 = new RegExpression("baa"); // expr2: "bb" expr2 = new RegExpression("bb"); // expr3: "baa | baa" expr3 = expr1.Or(expr2); // all: "(a|b)*" all = (a.Or(b)).Star(); // expr4: "(baa | baa)+" expr4 = expr3.Plus(); // expr5: "(baa | baa)+ (a|b)*" expr5 = expr4.Dot(all); }
/// <summary> /// Converts a string to a regExpressionObject. /// </summary> /// <param name="regexString">The regex string.</param> /// <returns></returns> public RegExpression StringToRegExpression(RegExpression regex, string regexString) { // Remove spaces regexString = regexString.Replace(" ", String.Empty); // Seperate all terminals char[] seperators = { '+', '*', '|', '(', ')' }; List <string> terminalParts = regexString.Split(seperators).ToList(); terminalParts.RemoveAll(x => x == String.Empty); int terminalIndex = 0; int maxTerminals = terminalParts.Count(); int i = 0; while (i < regexString.Length) { char currentChar = regexString[i]; // For everything between ( ) if (currentChar == '(') { int closingBracketPosition = -1; int bracketCount = 0; for (int j = i + 1; i < regexString.Length; j++) { if (regexString[j] == '(') { bracketCount++; } // We found the matching closing bracket if (regexString[j] == ')' && bracketCount == 0) { closingBracketPosition = j; break; } if (regexString[j] == ')' && bracketCount != 0) { bracketCount--; } } // Get the regex for the part between () string between = regexString.Substring(i + 1, closingBracketPosition - 1 - i); RegExpression regExpression = StringToRegExpression(new RegExpression(), between); // Look for the part after closing bracket if (closingBracketPosition + 1 < regexString.Length) { i = closingBracketPosition + 1; currentChar = regexString[i]; if (currentChar == '+') { regExpression = regExpression.Plus(); } else if (currentChar == '*') { regExpression = regExpression.Star(); } } if (regex.terminals == "" && regex.o == Operator.ONE) { regex = regExpression; } else { regex = regex.Dot(regExpression); } } // For all the operators not related to () else if (currentChar == '+') { regex = regex.Plus(); } else if (currentChar == '*') { regex = regex.Star(); } else if (currentChar == '|') { regex = regex.Or(new RegExpression(terminalParts[terminalIndex].ToString())); terminalIndex++; i++; } else { if (regex.terminals == "" && regex.o == Operator.ONE) { regex.terminals = terminalParts[terminalIndex]; // Skip rest of the terminal part in the loop int num = regex.terminals.Count(); i += terminalParts[terminalIndex].Length - 1; terminalIndex++; } else { //regex = regex.Dot(new RegExpression(currentChar.ToString())); } } i++; } //for (int i = 0; i < regexString.Length; i++) //{ // char currentChar = regexString[i]; // // For everything between ( ) // if (currentChar == '(') // { // int closingBracketPosition = -1; // int bracketCount = 0; // for (int j = i + 1; i < regexString.Length; j++) // { // if (regexString[j] == '(') bracketCount++; // // We found the matching closing bracket // if (regexString[j] == ')' && bracketCount == 0) // { // closingBracketPosition = j; // break; // } // if (regexString[j] == ')' && bracketCount != 0) // { // bracketCount--; // } // } // // Get the regex for the part between () // string between = regexString.Substring(i + 1, closingBracketPosition - 1 - i); // RegExpression regExpression = StringToRegExpression(new RegExpression(), between); // // Look for the part after closing bracket // if (closingBracketPosition + 1 < regexString.Length) // { // i = closingBracketPosition + 1; // currentChar = regexString[i]; // if (currentChar == '+') // { // regExpression = regExpression.Plus(); // } // else if (currentChar == '*') // { // regExpression = regExpression.Star(); // } // } // if (regex.terminals == "" && regex.o == Operator.ONE) // { // regex = regExpression; // } // else // { // regex = regex.Dot(regExpression); // } // } // // For all the operators not related to () // else if (currentChar == '+') // { // regex = regex.Plus(); // } // else if (currentChar == '*') // { // regex = regex.Star(); // } // else if (currentChar == '|') // { // regex = regex.Or(new RegExpression(terminalParts[terminalIndex].ToString())); // terminalIndex++; // i++; // } // else // { // if (regex.terminals == "" && regex.o == Operator.ONE) // { // regex.terminals = terminalParts[terminalIndex]; // // Skip rest of the terminal part in the loop // int num = regex.terminals.Count(); // i += terminalParts[terminalIndex].Length - 1; // terminalIndex++; // } // else // { // //regex = regex.Dot(new RegExpression(currentChar.ToString())); // } // } //} return(regex); }