public NFA maybe() { NFA ret = new NFA(this); ret.endStates.Add(ret.startState); return(ret); }
private NFA parseAlternates(String str, ref int loc) { //str[loc] == '[' loc++; bool exclusion = false; if (str[loc] == '^') { exclusion = true; loc++; } NFA builtNFA = null; while (str[loc] != ']') { if (loc >= str.Length) { throw new RegexException("Regex has a [ without a matching ]"); } NFA nextNFA; if (str[loc] == '\\') { nextNFA = parseSpecial(str, ref loc); } else if (loc < str.Length - 1 && str[loc + 1] == '-') { char start = str[loc]; loc += 2; char end = str[loc]; HashSet <char> chars = new HashSet <char>(); for (char ch = start; ch <= end; ch++) { chars.Add(ch); } nextNFA = new NFA(chars); loc++; } else { nextNFA = new NFA(str.Substring(loc, 1)); loc++; } if (builtNFA == null) { builtNFA = nextNFA; } else { builtNFA = builtNFA.or(nextNFA); } } if (exclusion) { builtNFA = builtNFA.complement(); } builtNFA = builtNFA.complement().or(new NFA(1).complement()).complement(); loc++; //loc is first character after ] return(builtNFA); }
public NFA(NFA second) { //we want to create a deep copy instead of modifying the original connections = second.connections.ConvertAll(c => c.copy()); endStates = new HashSet <int>(second.endStates); startState = second.startState; numStates = second.numStates; hasEpsilons = second.hasEpsilons; calculateConnectionPerState(); }
private NFA parseRegex(String str, ref int loc) { NFA builtNFA = new NFA(""); while (loc < str.Length) { NFA nextNFA = parseOne(str, ref loc); builtNFA = builtNFA.conc(nextNFA); } return(builtNFA); }
public NFA not() { NFA ret = new NFA(this); HashSet <int> newEndStates = new HashSet <int>(); for (int i = 0; i < ret.numStates; i++) { if (!ret.endStates.Contains(i)) { newEndStates.Add(i); } } ret.endStates = newEndStates; return(ret); }
private NFA parseParenthesis(string str, ref int loc) { //str[loc] == '(' loc++; NFA totalNFA = null; NFA builtNFA = null; while (str[loc] != ')') { if (loc >= str.Length) { throw new RegexException("Regex has a ( without a matching )"); } if (str[loc] == '|') { if (totalNFA == null) { totalNFA = builtNFA; } else { totalNFA = totalNFA.or(builtNFA); } builtNFA = null; loc++; } NFA nextNFA = parseOne(str, ref loc); if (builtNFA == null) { builtNFA = nextNFA; } else { builtNFA = builtNFA.conc(nextNFA); } } if (totalNFA == null) { totalNFA = builtNFA; } else { totalNFA = totalNFA.or(builtNFA); } loc++; //loc is first character after ) return(totalNFA); }
public NFA closure() { NFA ret = new NFA(this); //add new start state int oldStartState = ret.startState; int newStartState = ret.addState(false); ret.startState = newStartState; ret.connect(newStartState, oldStartState, new EpsilonConnection(0, 0)); //add connections from old end states to old start state ret.addConnectionToEndStates(oldStartState, new EpsilonConnection(0, 0)); //old start state is a valid end state ret.endStates.Add(newStartState); ret.hasEpsilons = true; return(ret); }
public NFA conc(NFA second) { NFA ret = new NFA(this); //add new states ret.incrementIndices(second.numStates); foreach (Connection c in second.connections) { ret.connections.Add(c.copy()); } ret.calculateConnectionPerState(); //connect old end states to second's start state int secondStartState = second.startState; ret.addConnectionToEndStates(secondStartState, new EpsilonConnection(0, 0)); //fix end states as second's end states ret.endStates = new HashSet <int>(second.endStates); ret.hasEpsilons = true; return(ret); }
private NFA parseOne(String str, ref int loc) { NFA retNFA; if (str[loc] == '(') { retNFA = parseParenthesis(str, ref loc); } else if (str[loc] == '[') { retNFA = parseAlternates(str, ref loc); } else if (str[loc] == '\\') { retNFA = parseSpecial(str, ref loc); } else if (str[loc] == '.') { retNFA = new NFA(1); loc++; } else { retNFA = new NFA(str.Substring(loc, 1)); loc++; } if (loc < str.Length) { if (str[loc] == '*') { retNFA = retNFA.closure(); loc++; } else if (str[loc] == '?') { retNFA = retNFA.maybe(); loc++; } } return(retNFA); }
public NFA or(NFA second) { NFA ret = new NFA(this); //add states ret.incrementIndices(second.numStates); foreach (int i in second.endStates) { ret.endStates.Add(i); } //add connections between newly added states foreach (Connection c in second.connections) { ret.connections.Add(c.copy()); } //connect new start states with old start state ret.connect(ret.startState, second.startState, new EpsilonConnection(-1, -1)); ret.hasEpsilons = true; ret.calculateConnectionPerState(); return(ret); }
private NFA parseSpecial(String str, ref int loc) { //str[loc] == '\\' loc++; if (loc >= str.Length) { throw new RegexException("Regex ends with a special character marker"); } NFA ret; switch (str[loc]) { case '|': ret = new NFA("|"); break; case '\\': ret = new NFA("\\"); break; case 'n': ret = new NFA("\n"); break; case 't': ret = new NFA("\t"); break; case 'r': ret = new NFA("\r"); break; case '\'': ret = new NFA("\'"); break; case '\"': ret = new NFA("\""); break; case '*': ret = new NFA("*"); break; case '(': ret = new NFA("("); break; case ')': ret = new NFA(")"); break; case '[': ret = new NFA("["); break; case ']': ret = new NFA("]"); break; case '.': ret = new NFA("."); break; case '?': ret = new NFA("?"); break; case '^': ret = new NFA("^"); break; case '-': ret = new NFA("-"); break; default: throw new RegexException("Unknown escape character in regex"); } loc++; //loc is first character after special return(ret); }
public Regex(String str) { int loc = 0; nfa = parseRegex(str, ref loc); }
public NFA toDFA() { NFA temp = new NFA(this); NFA ret = new NFA(); temp.deEpsilonate(); List <HashSet <int> > newStates = new List <HashSet <int> >(); newStates.Add(new HashSet <int>()); Queue <HashSet <int> > statesThatNeedProcessing = new Queue <HashSet <int> >(); HashSet <int> startState = new HashSet <int>(); startState.Add(temp.startState); ret.startState = ret.addState(temp.endStates.Contains(temp.startState)); newStates.Add(startState); statesThatNeedProcessing.Enqueue(startState); while (statesThatNeedProcessing.Count > 0) { HashSet <int> newState = statesThatNeedProcessing.Dequeue(); int hasState = newStates.FindIndex(h => newState.SetEquals(h)); List <Connection> allConnections = new List <Connection>(); List <Connection> uninterestingConnections = new List <Connection>(); HashSet <int> anyReachable = new HashSet <int>(); foreach (int i in newState) { allConnections.AddRange(temp.connectionPerState[i]); } HashSet <char> interestingChars = new HashSet <char>(); foreach (Connection c in allConnections) { if (c is AnythingConnection) { anyReachable.Add(c.end); } if (c is AnythingConnection || c is ConnectionAnythingBut) { uninterestingConnections.Add(c); } interestingChars.UnionWith(c.interestingCharacters()); } foreach (char ch in interestingChars) { HashSet <int> reachable = new HashSet <int>(); foreach (Connection c in allConnections) { if (c.accepts(ch)) { reachable.Add(c.end); } } int newAddIndex = newStates.FindIndex(h => reachable.SetEquals(h)); if (newAddIndex == -1) { bool endState = false; foreach (int i in reachable) { if (temp.endStates.Contains(i)) { endState = true; break; } } newAddIndex = ret.addState(endState); newStates.Add(reachable); statesThatNeedProcessing.Enqueue(reachable); } ret.connect(hasState, newAddIndex, new ConnectionExactCharacter(hasState, newAddIndex, ch)); } if (uninterestingConnections.Count > 0) { HashSet <int> reachable = new HashSet <int>(); foreach (Connection c in uninterestingConnections) { reachable.Add(c.end); } int newAddIndex = newStates.FindIndex(h => reachable.SetEquals(h)); if (newAddIndex == -1) { bool endState = false; foreach (int i in reachable) { if (temp.endStates.Contains(i)) { endState = true; break; } } newAddIndex = ret.addState(endState); newStates.Add(reachable); statesThatNeedProcessing.Enqueue(reachable); } if (interestingChars.Count > 0) { ret.connect(hasState, newAddIndex, new ConnectionAnythingBut(hasState, newAddIndex, interestingChars)); } else { ret.connect(hasState, newAddIndex, new AnythingConnection(hasState, newAddIndex)); } } } int garbageState = ret.addState(false); ret.calculateConnectionPerState(); for (int i = 1; i < ret.numStates; i++) { HashSet <char> unUsableConnections = new HashSet <char>(); foreach (Connection c in ret.connectionPerState[i]) { unUsableConnections.UnionWith(c.acceptSet()); } bool canUseAll = true; bool canUseAny = true; HashSet <char> usableConnections = new HashSet <char>(); foreach (Connection c in ret.connectionPerState[i]) { if (c is AnythingConnection) { canUseAny = false; break; } if (c is ConnectionAnythingBut) { if (canUseAll) { usableConnections = new HashSet <char>(((ConnectionAnythingBut)c).forbidden); canUseAll = false; } else { usableConnections.IntersectWith(((ConnectionAnythingBut)c).forbidden); } } } if (canUseAny) { if (!canUseAll) { usableConnections.ExceptWith(unUsableConnections); if (usableConnections.Count > 0) { ret.connect(i, garbageState, new ConnectionAnyOf(i, garbageState, usableConnections)); } } else { if (unUsableConnections.Count > 0) { ret.connect(i, garbageState, new ConnectionAnythingBut(i, garbageState, unUsableConnections)); } else { ret.connect(i, garbageState, new AnythingConnection(i, garbageState)); } } } } ret.connect(garbageState, garbageState, new AnythingConnection(garbageState, garbageState)); ret.calculateConnectionPerState(); return(ret); }