Ejemplo n.º 1
0
        public NFA maybe()
        {
            NFA ret = new NFA(this);

            ret.endStates.Add(ret.startState);
            return(ret);
        }
Ejemplo n.º 2
0
        private NFA parseAlternates(String str, ref int loc)
        {
            //str[loc] == '['
            loc++;
            bool exclusion = false;

            if (str[loc] == '^')
            {
                exclusion = true;
                loc++;
            }
            NFA builtNFA = null;

            while (str[loc] != ']')
            {
                if (loc >= str.Length)
                {
                    throw new RegexException("Regex has a [ without a matching ]");
                }
                NFA nextNFA;
                if (str[loc] == '\\')
                {
                    nextNFA = parseSpecial(str, ref loc);
                }
                else if (loc < str.Length - 1 && str[loc + 1] == '-')
                {
                    char start = str[loc];
                    loc += 2;
                    char           end   = str[loc];
                    HashSet <char> chars = new HashSet <char>();
                    for (char ch = start; ch <= end; ch++)
                    {
                        chars.Add(ch);
                    }
                    nextNFA = new NFA(chars);
                    loc++;
                }
                else
                {
                    nextNFA = new NFA(str.Substring(loc, 1));
                    loc++;
                }
                if (builtNFA == null)
                {
                    builtNFA = nextNFA;
                }
                else
                {
                    builtNFA = builtNFA.or(nextNFA);
                }
            }
            if (exclusion)
            {
                builtNFA = builtNFA.complement();
            }
            builtNFA = builtNFA.complement().or(new NFA(1).complement()).complement();
            loc++;
            //loc is first character after ]
            return(builtNFA);
        }
Ejemplo n.º 3
0
 public NFA(NFA second)
 {
     //we want to create a deep copy instead of modifying the original
     connections = second.connections.ConvertAll(c => c.copy());
     endStates   = new HashSet <int>(second.endStates);
     startState  = second.startState;
     numStates   = second.numStates;
     hasEpsilons = second.hasEpsilons;
     calculateConnectionPerState();
 }
Ejemplo n.º 4
0
        private NFA parseRegex(String str, ref int loc)
        {
            NFA builtNFA = new NFA("");

            while (loc < str.Length)
            {
                NFA nextNFA = parseOne(str, ref loc);
                builtNFA = builtNFA.conc(nextNFA);
            }
            return(builtNFA);
        }
Ejemplo n.º 5
0
        public NFA not()
        {
            NFA           ret          = new NFA(this);
            HashSet <int> newEndStates = new HashSet <int>();

            for (int i = 0; i < ret.numStates; i++)
            {
                if (!ret.endStates.Contains(i))
                {
                    newEndStates.Add(i);
                }
            }
            ret.endStates = newEndStates;
            return(ret);
        }
Ejemplo n.º 6
0
        private NFA parseParenthesis(string str, ref int loc)
        {
            //str[loc] == '('
            loc++;
            NFA totalNFA = null;
            NFA builtNFA = null;

            while (str[loc] != ')')
            {
                if (loc >= str.Length)
                {
                    throw new RegexException("Regex has a ( without a matching )");
                }
                if (str[loc] == '|')
                {
                    if (totalNFA == null)
                    {
                        totalNFA = builtNFA;
                    }
                    else
                    {
                        totalNFA = totalNFA.or(builtNFA);
                    }
                    builtNFA = null;
                    loc++;
                }
                NFA nextNFA = parseOne(str, ref loc);
                if (builtNFA == null)
                {
                    builtNFA = nextNFA;
                }
                else
                {
                    builtNFA = builtNFA.conc(nextNFA);
                }
            }
            if (totalNFA == null)
            {
                totalNFA = builtNFA;
            }
            else
            {
                totalNFA = totalNFA.or(builtNFA);
            }
            loc++;
            //loc is first character after )
            return(totalNFA);
        }
Ejemplo n.º 7
0
        public NFA closure()
        {
            NFA ret = new NFA(this);
            //add new start state
            int oldStartState = ret.startState;
            int newStartState = ret.addState(false);

            ret.startState = newStartState;
            ret.connect(newStartState, oldStartState, new EpsilonConnection(0, 0));
            //add connections from old end states to old start state
            ret.addConnectionToEndStates(oldStartState, new EpsilonConnection(0, 0));
            //old start state is a valid end state
            ret.endStates.Add(newStartState);
            ret.hasEpsilons = true;
            return(ret);
        }
Ejemplo n.º 8
0
        public NFA conc(NFA second)
        {
            NFA ret = new NFA(this);

            //add new states
            ret.incrementIndices(second.numStates);
            foreach (Connection c in second.connections)
            {
                ret.connections.Add(c.copy());
            }
            ret.calculateConnectionPerState();
            //connect old end states to second's start state
            int secondStartState = second.startState;

            ret.addConnectionToEndStates(secondStartState, new EpsilonConnection(0, 0));
            //fix end states as second's end states
            ret.endStates   = new HashSet <int>(second.endStates);
            ret.hasEpsilons = true;
            return(ret);
        }
Ejemplo n.º 9
0
        private NFA parseOne(String str, ref int loc)
        {
            NFA retNFA;

            if (str[loc] == '(')
            {
                retNFA = parseParenthesis(str, ref loc);
            }
            else if (str[loc] == '[')
            {
                retNFA = parseAlternates(str, ref loc);
            }
            else if (str[loc] == '\\')
            {
                retNFA = parseSpecial(str, ref loc);
            }
            else if (str[loc] == '.')
            {
                retNFA = new NFA(1);
                loc++;
            }
            else
            {
                retNFA = new NFA(str.Substring(loc, 1));
                loc++;
            }
            if (loc < str.Length)
            {
                if (str[loc] == '*')
                {
                    retNFA = retNFA.closure();
                    loc++;
                }
                else if (str[loc] == '?')
                {
                    retNFA = retNFA.maybe();
                    loc++;
                }
            }
            return(retNFA);
        }
Ejemplo n.º 10
0
        public NFA or(NFA second)
        {
            NFA ret = new NFA(this);

            //add states
            ret.incrementIndices(second.numStates);
            foreach (int i in second.endStates)
            {
                ret.endStates.Add(i);
            }
            //add connections between newly added states
            foreach (Connection c in second.connections)
            {
                ret.connections.Add(c.copy());
            }
            //connect new start states with old start state
            ret.connect(ret.startState, second.startState, new EpsilonConnection(-1, -1));
            ret.hasEpsilons = true;
            ret.calculateConnectionPerState();
            return(ret);
        }
Ejemplo n.º 11
0
        private NFA parseSpecial(String str, ref int loc)
        {
            //str[loc] == '\\'
            loc++;
            if (loc >= str.Length)
            {
                throw new RegexException("Regex ends with a special character marker");
            }
            NFA ret;

            switch (str[loc])
            {
            case '|':
                ret = new NFA("|");
                break;

            case '\\':
                ret = new NFA("\\");
                break;

            case 'n':
                ret = new NFA("\n");
                break;

            case 't':
                ret = new NFA("\t");
                break;

            case 'r':
                ret = new NFA("\r");
                break;

            case '\'':
                ret = new NFA("\'");
                break;

            case '\"':
                ret = new NFA("\"");
                break;

            case '*':
                ret = new NFA("*");
                break;

            case '(':
                ret = new NFA("(");
                break;

            case ')':
                ret = new NFA(")");
                break;

            case '[':
                ret = new NFA("[");
                break;

            case ']':
                ret = new NFA("]");
                break;

            case '.':
                ret = new NFA(".");
                break;

            case '?':
                ret = new NFA("?");
                break;

            case '^':
                ret = new NFA("^");
                break;

            case '-':
                ret = new NFA("-");
                break;

            default:
                throw new RegexException("Unknown escape character in regex");
            }
            loc++;
            //loc is first character after special
            return(ret);
        }
Ejemplo n.º 12
0
        public Regex(String str)
        {
            int loc = 0;

            nfa = parseRegex(str, ref loc);
        }
Ejemplo n.º 13
0
        public NFA toDFA()
        {
            NFA temp = new NFA(this);
            NFA ret  = new NFA();

            temp.deEpsilonate();
            List <HashSet <int> > newStates = new List <HashSet <int> >();

            newStates.Add(new HashSet <int>());
            Queue <HashSet <int> > statesThatNeedProcessing = new Queue <HashSet <int> >();
            HashSet <int>          startState = new HashSet <int>();

            startState.Add(temp.startState);
            ret.startState = ret.addState(temp.endStates.Contains(temp.startState));
            newStates.Add(startState);
            statesThatNeedProcessing.Enqueue(startState);
            while (statesThatNeedProcessing.Count > 0)
            {
                HashSet <int>     newState                 = statesThatNeedProcessing.Dequeue();
                int               hasState                 = newStates.FindIndex(h => newState.SetEquals(h));
                List <Connection> allConnections           = new List <Connection>();
                List <Connection> uninterestingConnections = new List <Connection>();
                HashSet <int>     anyReachable             = new HashSet <int>();
                foreach (int i in newState)
                {
                    allConnections.AddRange(temp.connectionPerState[i]);
                }
                HashSet <char> interestingChars = new HashSet <char>();
                foreach (Connection c in allConnections)
                {
                    if (c is AnythingConnection)
                    {
                        anyReachable.Add(c.end);
                    }
                    if (c is AnythingConnection || c is ConnectionAnythingBut)
                    {
                        uninterestingConnections.Add(c);
                    }
                    interestingChars.UnionWith(c.interestingCharacters());
                }
                foreach (char ch in interestingChars)
                {
                    HashSet <int> reachable = new HashSet <int>();
                    foreach (Connection c in allConnections)
                    {
                        if (c.accepts(ch))
                        {
                            reachable.Add(c.end);
                        }
                    }
                    int newAddIndex = newStates.FindIndex(h => reachable.SetEquals(h));
                    if (newAddIndex == -1)
                    {
                        bool endState = false;
                        foreach (int i in reachable)
                        {
                            if (temp.endStates.Contains(i))
                            {
                                endState = true;
                                break;
                            }
                        }
                        newAddIndex = ret.addState(endState);
                        newStates.Add(reachable);
                        statesThatNeedProcessing.Enqueue(reachable);
                    }
                    ret.connect(hasState, newAddIndex, new ConnectionExactCharacter(hasState, newAddIndex, ch));
                }
                if (uninterestingConnections.Count > 0)
                {
                    HashSet <int> reachable = new HashSet <int>();
                    foreach (Connection c in uninterestingConnections)
                    {
                        reachable.Add(c.end);
                    }
                    int newAddIndex = newStates.FindIndex(h => reachable.SetEquals(h));
                    if (newAddIndex == -1)
                    {
                        bool endState = false;
                        foreach (int i in reachable)
                        {
                            if (temp.endStates.Contains(i))
                            {
                                endState = true;
                                break;
                            }
                        }
                        newAddIndex = ret.addState(endState);
                        newStates.Add(reachable);
                        statesThatNeedProcessing.Enqueue(reachable);
                    }
                    if (interestingChars.Count > 0)
                    {
                        ret.connect(hasState, newAddIndex, new ConnectionAnythingBut(hasState, newAddIndex, interestingChars));
                    }
                    else
                    {
                        ret.connect(hasState, newAddIndex, new AnythingConnection(hasState, newAddIndex));
                    }
                }
            }
            int garbageState = ret.addState(false);

            ret.calculateConnectionPerState();
            for (int i = 1; i < ret.numStates; i++)
            {
                HashSet <char> unUsableConnections = new HashSet <char>();
                foreach (Connection c in ret.connectionPerState[i])
                {
                    unUsableConnections.UnionWith(c.acceptSet());
                }
                bool           canUseAll         = true;
                bool           canUseAny         = true;
                HashSet <char> usableConnections = new HashSet <char>();
                foreach (Connection c in ret.connectionPerState[i])
                {
                    if (c is AnythingConnection)
                    {
                        canUseAny = false;
                        break;
                    }
                    if (c is ConnectionAnythingBut)
                    {
                        if (canUseAll)
                        {
                            usableConnections = new HashSet <char>(((ConnectionAnythingBut)c).forbidden);
                            canUseAll         = false;
                        }
                        else
                        {
                            usableConnections.IntersectWith(((ConnectionAnythingBut)c).forbidden);
                        }
                    }
                }
                if (canUseAny)
                {
                    if (!canUseAll)
                    {
                        usableConnections.ExceptWith(unUsableConnections);
                        if (usableConnections.Count > 0)
                        {
                            ret.connect(i, garbageState, new ConnectionAnyOf(i, garbageState, usableConnections));
                        }
                    }
                    else
                    {
                        if (unUsableConnections.Count > 0)
                        {
                            ret.connect(i, garbageState, new ConnectionAnythingBut(i, garbageState, unUsableConnections));
                        }
                        else
                        {
                            ret.connect(i, garbageState, new AnythingConnection(i, garbageState));
                        }
                    }
                }
            }
            ret.connect(garbageState, garbageState, new AnythingConnection(garbageState, garbageState));
            ret.calculateConnectionPerState();
            return(ret);
        }