Ejemplo n.º 1
0
        /// <summary>
        /// Turns a <see cref="RegExp"/> into a <see cref="Automata{string}"/>, which is a NDFA.
        /// </summary>
        /// <param name="expressionToTranslate">The <see cref="RegExp"/> to translate.</param>
        /// <returns>A <see cref="Automata{string}"/>, which is a NDFA or null if there was a problem</returns>
        public Automata <string> GenerateNDFA(RegExp expressionToTranslate)
        {
            HashSet <char> alphabet      = new HashSet <char>();
            String         regexAsString = expressionToTranslate.ToString();

            foreach (char c in regexAsString)
            {
                if (IsUsableCharacter(c))
                {
                    alphabet.Add(c);
                }
            }
            ;

            ThompsonPart completeNdfaAsThompson = GenerateThompsonPart(regexAsString);

            if (completeNdfaAsThompson.Equals(new ThompsonPart()))
            {
                return(null);
            }

            Automata <string> NDFA = new Automata <string>(alphabet.ToArray());

            foreach (Transition <string> thompsonTransition in completeNdfaAsThompson.transitions)
            {
                NDFA.AddTransition(thompsonTransition);
            }
            NDFA.DefineAsStartState(completeNdfaAsThompson.startState);
            NDFA.DefineAsFinalState(completeNdfaAsThompson.finalState);

            return(NDFA);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Turns multiple characters into transitions.
        /// </summary>
        /// <param name="symbols">The characters to translate</param>
        /// <returns>A <see cref="ThompsonPart"/> that has all states and transitions.</returns>
        private ThompsonPart GenerateMultipleSymbolTransition(string symbols)
        {
            ThompsonPart generatedPart = new ThompsonPart();

            foreach (char c in symbols)
            {
                if (generatedPart.states.Count <= 0)
                {
                    generatedPart = GenerateSingleSymbolTransition(c);
                }
                else //Create an extra part and concat both transitions and the states. The new end state is the latest end state, thus the endstate of the extra part.
                {
                    ThompsonPart extraPart = GenerateSingleSymbolTransition(c);

                    Dictionary <string, string> remappedStates = new Dictionary <string, string>();
                    int stateIndex = generatedPart.states.Count - 1;

                    foreach (string state in extraPart.states)
                    {
                        remappedStates.Add(state, "q" + stateIndex);
                        stateIndex++;
                        generatedPart.states.Add(remappedStates[state]);
                    }

                    foreach (Transition <string> transition in extraPart.transitions)
                    {
                        generatedPart.transitions.Add(new Transition <string>(remappedStates[transition.FromState], remappedStates[transition.ToState], transition.Identifier));
                    }

                    generatedPart.finalState = remappedStates[extraPart.finalState];
                }
            }
            return(generatedPart);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Turns the given substring with the plus operator into the equivalent NDFA part.
        /// </summary>
        /// <param name="subString">The new part of the Regex to contain within this part.</param>
        /// <returns>A <see cref="ThompsonPart"/> that has all states and transitions.</returns>
        private ThompsonPart GenerateAsterixOperatorConstruction(string subString)//Works
        {
            ThompsonPart newPart       = new ThompsonPart();
            ThompsonPart containedPart = GenerateThompsonPart(subString);

            #region State consolidation
            //Consolidate states here so that all states are unique (this is not useable by multiple constructions, since those differ in contained elements
            int containedStatesCount = containedPart.states.Count;
            Dictionary <string, string> remappedStates = new Dictionary <string, string>();
            int stateIndex = 2;

            foreach (string state in containedPart.states)
            {
                remappedStates.Add(state, "q" + stateIndex);
                stateIndex++;
                newPart.states.Add(remappedStates[state]);
            }

            containedPart.startState = remappedStates[containedPart.startState];
            containedPart.finalState = remappedStates[containedPart.finalState];

            foreach (Transition <string> transition in containedPart.transitions)
            {
                newPart.transitions.Add(new Transition <string>(remappedStates[transition.FromState], remappedStates[transition.ToState], transition.Identifier));
            }

            if (containedPart.states.Count != containedStatesCount)
            {
                throw new Exception("Mismatch in list sizes of left part");
            }
            #endregion

            //After consolidation, use data to set values. First are the states
            newPart.startState = "q0";
            newPart.finalState = "q1";

            string[] newStates = { newPart.startState, newPart.finalState };
            foreach (string state in newStates)
            {
                newPart.states.Add(state);
            }

            //Followed by the transitions
            Transition <string>[] newTransitions =
            {
                new Transition <string>("q0",                     containedPart.startState),
                new Transition <string>(containedPart.finalState, "q1"),
                new Transition <string>(containedPart.finalState, containedPart.startState),
                new Transition <string>(newPart.startState,       newPart.finalState)
            };

            foreach (Transition <string> transition in newTransitions)
            {
                newPart.transitions.Add(transition);
            }

            return(newPart);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Translates a single symbol to a part of an NDFA.
        /// </summary>
        /// <param name="symbol">The symbol to translate.</param>
        /// <returns>A <see cref="ThompsonPart"/> that has all states and transitions.</returns>
        private ThompsonPart GenerateSingleSymbolTransition(char symbol)
        {
            ThompsonPart newPart = new ThompsonPart();

            Transition <string> transition = new Transition <string>("q0", "q1", symbol);

            newPart.transitions.Add(transition);

            newPart.states.Add(transition.FromState);
            newPart.startState = transition.FromState;

            newPart.states.Add(transition.ToState);
            newPart.finalState = transition.ToState;

            return(newPart);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Translates a operator to a thompson part.
        /// </summary>
        /// <param name="regex">The regex to translate.</param>
        /// <param name="regexOperator">The operator found</param>
        /// <param name="operatorPosition">Position of the operator to translate. Used to some substring stuff.</param>
        /// <returns>A <see cref="ThompsonPart"/> that contains all data.</returns>
        private ThompsonPart TranslateOperator(string regex, char regexOperator, int operatorPosition, bool regexHasBrackets)
        {
            //Build in check to translate character without brackets TODO: check for -1 in either position, if so assume there are no more brackets
            string leftExpression  = "";
            string rightExpression = "";

            leftExpression = regex.Substring(0, operatorPosition); //Also doubles as the whole regex minus the operator
            if (operatorPosition + 1 < regex.Length)
            {
                rightExpression = regex.Substring(operatorPosition + 1, (regex.Length - leftExpression.Length - 1));
            }

            if (regexHasBrackets)
            {
                leftExpression = StripRegexOfOuterBrackets(leftExpression);
                if (rightExpression != "")
                {
                    rightExpression = StripRegexOfOuterBrackets(rightExpression);
                }
            }

            ThompsonPart generatedPart = new ThompsonPart();

            //And generate something for the operator. Note that the first two split the string, while the second two do not, so the second two take the whole string
            switch (regexOperator)
            {
            case '.':
                generatedPart = GenerateDotOperatorConstruction(leftExpression, rightExpression);
                break;

            case '|':
                generatedPart = GenerateOrOperatorConstruction(leftExpression, rightExpression);
                break;

            case '+':
                generatedPart = GeneratePlusOperatorConstruction(leftExpression);
                break;

            case '*':
                generatedPart = GenerateAsterixOperatorConstruction(leftExpression);
                break;
            }

            return(generatedPart);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Turns a given string into a part of the NDFA
        /// </summary>
        /// <param name="regexAsString">The string to translate. This can be the whole <see cref="RegExp"/> or a sub string</param>
        /// <returns>A single <see cref="ThompsonPart"/> containing all data or an empty thompson construction if the regex can't be parsed.</returns>
        private ThompsonPart GenerateThompsonPart(string regexAsString)
        {
            if (!CheckForEvenBrackets(regexAsString))
            {
                return(new ThompsonPart());
            }

            ThompsonPart generatedPart = new ThompsonPart();

            bool regexHasBrackets       = false;
            int  bracketBalance         = 0; //Checks to see if we're in balance in the brackets, thus passing enough brackets to actually close.
            bool foundEnclosedSubstring = false;

            char regexOperator    = '$';
            int  operatorPosition = -1;

            //Iterate over string, check for brackets
            for (int i = 0; i < regexAsString.Length; i++)
            {
                //Special character checks
                //Check to see if we have an enclosed section which will be important
                if (regexAsString[i] == '(')
                {
                    foundEnclosedSubstring = true;
                    regexHasBrackets       = true;
                    bracketBalance++;
                }
                if (regexAsString[i] == ')')
                {
                    bracketBalance--;
                    if (bracketBalance == 0)
                    {
                        foundEnclosedSubstring = false;
                    }
                }

                //And save the special operator
                if (regexAsString[i] == '.' && regexOperator == '$')
                {
                    regexOperator = '.'; operatorPosition = i;
                }                                                                                                   //Yes I know two statements as a one liner. Couldn't be bothered to take up the extra space.
                if (regexAsString[i] == '|' && regexOperator == '$')
                {
                    regexOperator = '|'; operatorPosition = i;
                }
                if (regexAsString[i] == '+' && regexOperator == '$')
                {
                    regexOperator = '+'; operatorPosition = i;
                }
                if (regexAsString[i] == '*' && regexOperator == '$')
                {
                    regexOperator = '*'; operatorPosition = i;
                }

                if (regexHasBrackets) //Thus we have brackets in our regex
                {
                    //Check to see if we should break, or that we should continue (should one of the operators be contained within brackets)
                    if (regexOperator != '$' && foundEnclosedSubstring)//This means that we have a contained operator
                    {
                        regexOperator    = '$';
                        operatorPosition = -1;
                    }
                    else if (regexOperator != '$') //Translates the operator that has been found
                    {
                        generatedPart = TranslateOperator(regexAsString, regexOperator, operatorPosition, regexHasBrackets);
                    }
                }
                else if (i == regexAsString.Length - 1)//When that last character has been passed, dive into this part
                {
                    //Translates the operator that has been found when there are no more brackets
                    if (regexOperator != '$')
                    {
                        generatedPart = TranslateOperator(regexAsString, regexOperator, operatorPosition, regexHasBrackets);
                    }
                    //We've found no brackets and operators, thus we can turn the characters into transitions
                    else
                    {
                        generatedPart = GenerateMultipleSymbolTransition(regexAsString);
                    }
                }
            }
            return(generatedPart);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Turns two regex substring into the dot operator equivalent NDFA part.
        /// </summary>
        /// <param name="leftSubstring">The substring that should be between state q0 and q1.</param>
        /// <param name="rightSubstring">The substring that should be between state q2 and q3.</param>
        /// <returns>A <see cref="ThompsonPart"/> that has all states and transitions.</returns>
        private ThompsonPart GenerateDotOperatorConstruction(string leftSubstring, string rightSubstring)//Works
        {
            ThompsonPart newPart   = new ThompsonPart();
            ThompsonPart leftPart  = GenerateThompsonPart(leftSubstring);
            ThompsonPart rightPart = GenerateThompsonPart(rightSubstring);

            #region State consolidation
            //Consolidate states here so that all states are unique (this is not useable by multiple constructions, since those differ in contained elements
            int leftStatesCount  = leftPart.states.Count;
            int rightStatesCount = rightPart.states.Count;

            Dictionary <string, string> remappedStates = new Dictionary <string, string>();
            int stateIndex = 0;

            foreach (string state in leftPart.states)
            {
                remappedStates.Add(state, "q" + stateIndex);
                stateIndex++;
                newPart.states.Add(remappedStates[state]);
            }

            leftPart.startState = remappedStates[leftPart.startState];
            leftPart.finalState = remappedStates[leftPart.finalState];

            foreach (Transition <string> transition in leftPart.transitions)
            {
                newPart.transitions.Add(new Transition <string>(remappedStates[transition.FromState], remappedStates[transition.ToState], transition.Identifier));
            }

            if (leftPart.states.Count != leftStatesCount)
            {
                throw new Exception("Mismatch in list sizes of left part");
            }

            remappedStates.Clear();

            foreach (string state in rightPart.states)
            {
                remappedStates.Add(state, "q" + stateIndex);
                stateIndex++;
                newPart.states.Add(remappedStates[state]);
            }

            rightPart.startState = remappedStates[rightPart.startState];
            rightPart.finalState = remappedStates[rightPart.finalState];

            foreach (Transition <string> transition in rightPart.transitions)
            {
                newPart.transitions.Add(new Transition <string>(remappedStates[transition.FromState], remappedStates[transition.ToState], transition.Identifier));
            }

            if (rightPart.states.Count != rightStatesCount)
            {
                throw new Exception("Mismatch in list sizes of right part");
            }
            #endregion

            //After consolidation, use data to set values
            Transition <string> epsilonTransition = new Transition <string>(leftPart.finalState, rightPart.startState);                          //Epsilon transition between the two parts.

            newPart.transitions.Add(epsilonTransition);

            newPart.startState = leftPart.startState;
            newPart.finalState = rightPart.finalState;

            return(newPart);
        }