Example #1
0
        /// <summary>
        /// Creates multiple example regexp.
        /// </summary>
        public static void CreateExampleRegExp()
        {
            // regex : (aa)*(aa)+
            string        regex1      = "(aa)*(aa)+";
            RegExpression expression1 = new RegExpression(regex1);

            Console.WriteLine($"Language (aa)*(aa)+ \n" +
                              $"Language via toString method = {expression1.ToString()}\n");

            // regex : a* (aa+ | ba*b ) * (abba | baab | bbbb)+
            string        regex2      = "a* (aa+ | ba*b ) * (abba | baab | bbbb)+";
            RegExpression expression2 = new RegExpression(regex2);

            Console.WriteLine("Language a* (aa+ | ba*b ) * (abba | baab | bbbb)+ \n" +
                              $"Language via toString method = {expression2.ToString()}\n" +
                              $"Word asdasdsd \n\n");

            // regex : (a*b*)+ (bb*b | ab*baa)+
            string        regex3      = "(a*b*)+ (bb*b | ab*baa)+";
            RegExpression expression3 = new RegExpression(regex3);

            Console.WriteLine("Language (a*b*)+ (bb*b | ab*baa)+ \n" +
                              $"Language via toString method = {expression3.ToString()}\n" +
                              $"Word asdadsad \n\n");
        }
Example #2
0
        /// <summary>
        /// Creates the example thompson construction.
        /// </summary>
        public static void CreateExampleThompsonConstruction()
        {
            // regex : (aa)*(aa)+
            string            regex1      = "(aa)*(aa)+";
            RegExpression     expression1 = new RegExpression(regex1);
            Automata <string> ndfa1       = ThompsonConstruction.RegExpToNDFA(expression1);

            ndfa1.GenerateGraphFile("ThompsonNDFA1");
            FileDotEngine.Run(@".\..\..\graphviz\dotfiles\ThompsonNDFA1", "ThompsonNDFA1Pic");

            //// regex : a(a|b)*
            string            regex2      = "a(a|b)*";
            RegExpression     expression2 = new RegExpression(regex2);
            Automata <string> ndfa2       = ThompsonConstruction.RegExpToNDFA(expression2);

            ndfa2.GenerateGraphFile("ThompsonNDFA2");
            FileDotEngine.Run(@".\..\..\graphviz\dotfiles\ThompsonNDFA2", "ThompsonNDFA2Pic");

            // regex : a*(aa+|(ba))* (abba|baab|bbbb)+
            string            regex3      = "a*(aa+|(ba))* (abba|baab|bbbb)+";
            RegExpression     expression3 = new RegExpression(regex3);
            Automata <string> ndfa3       = ThompsonConstruction.RegExpToNDFA(expression3);

            ndfa3.GenerateGraphFile("ThompsonNDFA3");
            FileDotEngine.Run(@".\..\..\graphviz\dotfiles\ThompsonNDFA3", "ThompsonNDFA3Pic");
        }
        /// <summary>
        /// Converts the regexpress units.
        /// </summary>
        /// <param name="regExp">The reg exp.</param>
        /// <param name="ndfa">The ndfa.</param>
        /// <param name="stateCounter">The state counter.</param>
        /// <param name="leftState">State of the left.</param>
        /// <param name="rightState">State of the right.</param>
        public static void Convert(RegExpression regExp, ref Automata <string> ndfa, ref int stateCounter, int leftState, int rightState)
        {
            switch (regExp.o)
            {
            case RegExpression.Operator.PLUS:
                Plus(regExp, ref ndfa, ref stateCounter, leftState, rightState);
                break;

            case RegExpression.Operator.STAR:
                Star(regExp, ref ndfa, ref stateCounter, leftState, rightState);
                break;

            case RegExpression.Operator.OR:
                Or(regExp, ref ndfa, ref stateCounter, leftState, rightState);
                break;

            case RegExpression.Operator.DOT:
                Dot(regExp, ref ndfa, ref stateCounter, leftState, rightState);
                break;

            case RegExpression.Operator.ONE:
                One(regExp, ref ndfa, ref stateCounter, leftState, rightState);
                break;
            }
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="RegExp"/> class.
 /// </summary>
 public RegExpression()
 {
     o         = Operator.ONE;
     terminals = "";
     left      = null;
     right     = null;
 }
 /// <summary>
 /// Ones the specified reg exp.
 /// </summary>
 /// <param name="regExp">The reg exp.</param>
 /// <param name="automaat">The automaat.</param>
 /// <param name="stateCounter">The state counter.</param>
 /// <param name="leftState">State of the left.</param>
 /// <param name="rightState">State of the right.</param>
 public static void One(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState)
 {
     char[] characters = regExp.terminals.ToCharArray();
     if (characters.Length == 1)
     {
         // Create 1 letter transition
         automaat.AddTransition(
             new Transition <string>(leftState.ToString(), characters[0], rightState.ToString()));
     }
     else
     {
         // Create transition for multiple letters
         automaat.AddTransition(
             new Transition <string>(leftState.ToString(), characters[0], stateCounter.ToString()));
         int i = 1;
         while (i < characters.Length - 1)
         {
             automaat.AddTransition(new Transition <string>(stateCounter.ToString(), characters[i],
                                                            (stateCounter + 1).ToString()));
             stateCounter++;
             i++;
         }
         automaat.AddTransition(
             new Transition <string>(stateCounter.ToString(), characters[i], rightState.ToString()));
         stateCounter++;
     }
 }
        /// <summary>
        /// Converts . in the regex
        /// </summary>
        /// <param name="regExp">The reg exp.</param>
        /// <param name="automaat">The automaat.</param>
        /// <param name="stateCounter">The state counter.</param>
        /// <param name="leftState">State of the left.</param>
        /// <param name="rightState">State of the right.</param>
        public static void Dot(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState)
        {
            int midState = stateCounter;

            stateCounter++;
            Convert(regExp.left, ref automaat, ref stateCounter, leftState, midState);
            Convert(regExp.right, ref automaat, ref stateCounter, midState, rightState);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="RegExp"/> class.
        /// </summary>
        /// <param name="p">The p.</param>
        public RegExpression(string regex)
        {
            RegExpression regExp = StringToRegExpression(new RegExpression(), regex);

            o         = regExp.o;
            terminals = regExp.terminals;
            left      = regExp.left;
            right     = regExp.right;
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="T:System.Object" /> class.
        /// </summary>
        /// <returns></returns>
        public RegExpression Star()
        {
            RegExpression res = new RegExpression
            {
                o    = Operator.STAR,
                left = this,
            };

            return(res);
        }
        /// <summary>
        /// Pluses this instance.
        /// </summary>
        /// <returns></returns>
        public RegExpression Plus()
        {
            RegExpression res = new RegExpression
            {
                o    = Operator.PLUS,
                left = this
            };

            return(res);
        }
        /// <summary>
        /// Ors this instance.
        /// </summary>
        /// <returns></returns>
        public RegExpression Or(RegExpression e2)
        {
            RegExpression res = new RegExpression
            {
                o     = Operator.OR,
                left  = this,
                right = e2
            };

            return(res);
        }
        /// <summary>
        /// Converts + in the regex
        /// </summary>
        /// <param name="regExp">The reg exp.</param>
        /// <param name="automaat">The automaat.</param>
        /// <param name="stateCounter">The state counter.</param>
        /// <param name="leftState">State of the left.</param>
        /// <param name="rightState">State of the right.</param>
        public static void Plus(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState)
        {
            int stateTwo   = stateCounter;
            int stateThree = stateCounter + 1;

            stateCounter = stateCounter + 2;

            // Create epsilon transitions
            automaat.AddTransition(new Transition <string>(leftState.ToString(), '$', stateTwo.ToString()));
            automaat.AddTransition(new Transition <string>(stateThree.ToString(), '$', stateTwo.ToString()));
            automaat.AddTransition(new Transition <string>(stateThree.ToString(), '$', rightState.ToString()));

            // Convert the middle part
            Convert(regExp.left, ref automaat, ref stateCounter, stateTwo, stateThree);
        }
        /// <summary>
        /// Converts regular expression to NDFA.
        /// </summary>
        /// <param name="regExpression">The reg expression.</param>
        /// <returns></returns>
        public static Automata <string> RegExpToNDFA(RegExpression regExpression)
        {
            Automata <string> ndfa = new Automata <string>();

            ndfa.DefineAsStartState("0");
            ndfa.DefineAsFinalState("1");
            int stateCounter = 2;

            Convert(regExpression, ref ndfa, ref stateCounter, 0, 1);
            ndfa.symbols = new SortedSet <char>(ndfa.transitions.Distinct().Select(e => e.GetSymbol()).ToList());

            // Remove epsilons
            ndfa.symbols.Remove('$');
            return(ndfa);
        }
        /// <summary>
        /// Converts | in the regex
        /// </summary>
        /// <param name="regExp">The reg exp.</param>
        /// <param name="automaat">The automaat.</param>
        /// <param name="stateCounter">The state counter.</param>
        /// <param name="leftState">State of the left.</param>
        /// <param name="rightState">State of the right.</param>
        public static void Or(RegExpression regExp, ref Automata <string> automaat, ref int stateCounter, int leftState, int rightState)
        {
            int state2 = stateCounter;
            int state3 = stateCounter + 1;
            int state4 = stateCounter + 2;
            int state5 = stateCounter + 3;

            stateCounter = stateCounter + 4;

            // Create epsilon transitions
            automaat.AddTransition(new Transition <string>(leftState.ToString(), '$', state2.ToString()));
            automaat.AddTransition(new Transition <string>(leftState.ToString(), '$', state4.ToString()));
            automaat.AddTransition(new Transition <string>(state3.ToString(), '$', rightState.ToString()));
            automaat.AddTransition(new Transition <string>(state5.ToString(), '$', rightState.ToString()));

            // Convert the middle part of both middle parts
            Convert(regExp.left, ref automaat, ref stateCounter, state2, state3);
            Convert(regExp.right, ref automaat, ref stateCounter, state4, state5);
        }
Example #14
0
        /// <summary>
        /// Reg expression 3 testData
        /// </summary>
        public TestRegExp()
        {
            a = new RegExpression("a");
            b = new RegExpression("b");

            // expr1: "baa"
            expr1 = new RegExpression("baa");
            // expr2: "bb"
            expr2 = new RegExpression("bb");
            // expr3: "baa | baa"
            expr3 = expr1.Or(expr2);

            // all: "(a|b)*"
            all = (a.Or(b)).Star();

            // expr4: "(baa | baa)+"
            expr4 = expr3.Plus();

            // expr5: "(baa | baa)+ (a|b)*"
            expr5 = expr4.Dot(all);
        }
        /// <summary>
        /// Converts a string to a regExpressionObject.
        /// </summary>
        /// <param name="regexString">The regex string.</param>
        /// <returns></returns>
        public RegExpression StringToRegExpression(RegExpression regex, string regexString)
        {
            // Remove spaces
            regexString = regexString.Replace(" ", String.Empty);

            // Seperate all terminals
            char[]        seperators    = { '+', '*', '|', '(', ')' };
            List <string> terminalParts = regexString.Split(seperators).ToList();

            terminalParts.RemoveAll(x => x == String.Empty);
            int terminalIndex = 0;
            int maxTerminals  = terminalParts.Count();

            int i = 0;

            while (i < regexString.Length)
            {
                char currentChar = regexString[i];

                // For everything between ( )
                if (currentChar == '(')
                {
                    int closingBracketPosition = -1;
                    int bracketCount           = 0;
                    for (int j = i + 1; i < regexString.Length; j++)
                    {
                        if (regexString[j] == '(')
                        {
                            bracketCount++;
                        }

                        // We found the matching closing bracket
                        if (regexString[j] == ')' && bracketCount == 0)
                        {
                            closingBracketPosition = j;
                            break;
                        }
                        if (regexString[j] == ')' && bracketCount != 0)
                        {
                            bracketCount--;
                        }
                    }

                    // Get the regex for the part between  ()
                    string        between       = regexString.Substring(i + 1, closingBracketPosition - 1 - i);
                    RegExpression regExpression = StringToRegExpression(new RegExpression(), between);

                    // Look for the part after closing bracket
                    if (closingBracketPosition + 1 < regexString.Length)
                    {
                        i           = closingBracketPosition + 1;
                        currentChar = regexString[i];
                        if (currentChar == '+')
                        {
                            regExpression = regExpression.Plus();
                        }
                        else if (currentChar == '*')
                        {
                            regExpression = regExpression.Star();
                        }
                    }

                    if (regex.terminals == "" && regex.o == Operator.ONE)
                    {
                        regex = regExpression;
                    }
                    else
                    {
                        regex = regex.Dot(regExpression);
                    }
                }

                // For all the operators not related to ()
                else if (currentChar == '+')
                {
                    regex = regex.Plus();
                }
                else if (currentChar == '*')
                {
                    regex = regex.Star();
                }
                else if (currentChar == '|')
                {
                    regex = regex.Or(new RegExpression(terminalParts[terminalIndex].ToString()));
                    terminalIndex++;
                    i++;
                }
                else
                {
                    if (regex.terminals == "" && regex.o == Operator.ONE)
                    {
                        regex.terminals = terminalParts[terminalIndex];

                        // Skip rest of the terminal part in the loop
                        int num = regex.terminals.Count();
                        i += terminalParts[terminalIndex].Length - 1;
                        terminalIndex++;
                    }
                    else
                    {
                        //regex = regex.Dot(new RegExpression(currentChar.ToString()));
                    }
                }
                i++;
            }


            //for (int i = 0; i < regexString.Length; i++)
            //{
            //	char currentChar = regexString[i];

            //	// For everything between ( )
            //	if (currentChar == '(')
            //	{
            //		int closingBracketPosition = -1;
            //		int bracketCount = 0;
            //		for (int j = i + 1; i < regexString.Length; j++)
            //		{
            //			if (regexString[j] == '(') bracketCount++;

            //			// We found the matching closing bracket
            //			if (regexString[j] == ')' && bracketCount == 0)
            //			{
            //				closingBracketPosition = j;
            //				break;
            //			}
            //			if (regexString[j] == ')' && bracketCount != 0)
            //			{
            //				bracketCount--;
            //			}
            //		}

            //		// Get the regex for the part between  ()
            //		string between = regexString.Substring(i + 1, closingBracketPosition - 1 - i);
            //		RegExpression regExpression = StringToRegExpression(new RegExpression(), between);

            //		// Look for the part after closing bracket
            //		if (closingBracketPosition + 1 < regexString.Length)
            //		{
            //			i = closingBracketPosition + 1;
            //			currentChar = regexString[i];
            //			if (currentChar == '+')
            //			{
            //				regExpression = regExpression.Plus();
            //			}
            //			else if (currentChar == '*')
            //			{
            //				regExpression = regExpression.Star();
            //			}
            //		}

            //		if (regex.terminals == "" && regex.o == Operator.ONE)
            //		{
            //			regex = regExpression;
            //		}
            //		else
            //		{
            //			regex = regex.Dot(regExpression);
            //		}
            //	}

            //	// For all the operators not related to ()
            //	else if (currentChar == '+')
            //	{
            //		regex = regex.Plus();
            //	}
            //	else if (currentChar == '*')
            //	{
            //		regex = regex.Star();
            //	}
            //	else if (currentChar == '|')
            //	{
            //		regex = regex.Or(new RegExpression(terminalParts[terminalIndex].ToString()));
            //		terminalIndex++;
            //		i++;
            //	}
            //	else
            //	{
            //		if (regex.terminals == "" && regex.o == Operator.ONE)
            //		{
            //			regex.terminals = terminalParts[terminalIndex];

            //			// Skip rest of the terminal part in the loop
            //			int num = regex.terminals.Count();
            //			i += terminalParts[terminalIndex].Length - 1;
            //			terminalIndex++;
            //		}
            //		else
            //		{
            //			//regex = regex.Dot(new RegExpression(currentChar.ToString()));
            //		}
            //	}

            //}
            return(regex);
        }