private static void addChildrenOfBeginNodesIterative(AutomatonNodeCore origin, Dictionary <string, Dictionary <char, HashSet <string> > > states, string stateName) { foreach (AutomatonTransition trans in origin.children) { if (!states.ContainsKey(stateName)) { states[stateName] = new Dictionary <char, HashSet <string> >(); } if (trans.acceptedSymbols.Count > 0) { foreach (char alpha in trans.acceptedSymbols) { if (!states[stateName].ContainsKey(alpha)) { states[stateName][alpha] = new HashSet <string>(); } states[stateName][alpha].Add(trans.automatonNode.stateName); } } else { addChildrenOfBeginNodesIterative(trans.automatonNode, states, stateName); } } }
public static AutomatonCore reverseDFA(AutomatonCore automatonCore) { AutomatonCore reverse = new AutomatonCore(true); //Fill in the nodes and BeginStates become Endstates and Endstates become Beginstates foreach (AutomatonNodeCore node in automatonCore.nodes) { AutomatonNodeCore newNode = new AutomatonNodeCore(); newNode.stateName = node.stateName; if (node.isBeginNode) { newNode.isEndNode = true; newNode.isBeginNode = false; } if (node.isEndNode) { newNode.isEndNode = false; newNode.isBeginNode = true; } reverse.nodes.Add(newNode); } foreach (AutomatonNodeCore node in automatonCore.nodes) { foreach (AutomatonTransition trans in node.children) { foreach (AutomatonNodeCore node1 in reverse.nodes) { if (node.stateName != node1.stateName)//Matches OG parent { continue; } foreach (AutomatonNodeCore node2 in reverse.nodes) { if (node2.stateName != trans.automatonNode.stateName)//Matches OG child { continue; } AutomatonTransition trans1 = new AutomatonTransition(node1); trans1.acceptedSymbols = trans.acceptedSymbols; node2.children.Add(trans1); AutomatonTransition trans2 = new AutomatonTransition(node2); trans2.acceptedSymbols = trans.acceptedSymbols; node1.parents.Add(trans2); break; } break; } } } return(reverse); }
private static void makeNewStatesAfterBeginNodes(List <AutomatonNodeCore> iterationNodes, Dictionary <string, Dictionary <char, HashSet <string> > > states) { foreach (KeyValuePair <string, Dictionary <char, HashSet <string> > > state in states) { SortedSet <string> newStateParts = new SortedSet <string>(); string newStateName = ""; foreach (KeyValuePair <char, HashSet <string> > stateNameParts in state.Value) { foreach (string part in stateNameParts.Value) { newStateParts.Add(part); } foreach (string part in newStateParts) { if (newStateName != "") { newStateName += ","; } newStateName += part; } bool alreadyExists = false; foreach (AutomatonNodeCore node in iterationNodes) { if (node.stateName == newStateName) { alreadyExists = true; break; } } if (!alreadyExists) { AutomatonNodeCore newCore = new AutomatonNodeCore(); newCore.stateName = newStateName; iterationNodes.Add(newCore); } newStateName = ""; newStateParts.Clear(); } } foreach (AutomatonNodeCore node in iterationNodes) { if (!states.ContainsKey(node.stateName)) { states[node.stateName] = new Dictionary <char, HashSet <string> >(); } } }
/** * Assistant method for recursive Thompson construction * Do not call me outside of the above constructNDFA * The returned list has a number of requirements: * 1) the first state is the part that connects on the entry end of a black box * 2) the last state is the part that connects to the exit end of a black box * 3) All blackboxes are resolved on returning (recursive) */ private static LinkedList <AutomatonNodeCore> thompsonSubset(OperationTree parsedRegex) { LinkedList <AutomatonNodeCore> subset = new LinkedList <AutomatonNodeCore>(); //Assume the characters are parsed correctly switch (parsedRegex.Op.Character) { case '|': /* BLACK BOX A * ε/ \ε * OR construction -->[A] [B]--> * ε\ /ε * BLACK BOX B */ #region OR LinkedList <AutomatonNodeCore> orBlackBoxA, orBlackBoxB; //An OR operation should always have a left branch and a right branch orBlackBoxA = thompsonSubset(parsedRegex.OpLeft); orBlackBoxB = thompsonSubset(parsedRegex.OpRight); AutomatonNodeCore orStateA = new AutomatonNodeCore(), //Divergence point orStateB = new AutomatonNodeCore(); //Convergence point AutomatonTransition orEpsilonA = new AutomatonTransition(orBlackBoxA.First.Value), orEpsilonB = new AutomatonTransition(orBlackBoxB.First.Value), orEpsilonC = new AutomatonTransition(orStateB), orEpsilonD = new AutomatonTransition(orStateB); //The parts are ready, put them together orStateA.children.Add(orEpsilonA); orStateA.children.Add(orEpsilonB); orBlackBoxA.Last.Value.children.Add(orEpsilonC); orBlackBoxB.Last.Value.children.Add(orEpsilonD); subset.AddFirst(orStateA); foreach (AutomatonNodeCore n in orBlackBoxA) { subset.AddLast(n); } foreach (AutomatonNodeCore n in orBlackBoxB) { subset.AddLast(n); } subset.AddLast(orStateB); #endregion break; case '?': case '*': case '+': /* + -------------------ε------------------> + * ZERO-OR-ONE construction: -->[A] -ε-> [B] -ε-> BLACK BOX -ε-> [C] -ε-> [D]--> * * + -------------------ε------------------> + * ZERO-OR-MORE construction: -->[A] -ε-> [B] -ε-> BLACK BOX -ε-> [C] -ε-> [D]--> Notice the similarities? * + <---------ε---------- + * * ONE-OR-MORE construction: -->[A] -ε-> [B] -ε-> BLACK BOX -ε-> [C] -ε-> [D]--> * + <---------ε---------- + */ #region ZERO-OR-MORE+ONE-OR-MORE+ZERO-OR-ONE LinkedList <AutomatonNodeCore> thisBlackBox; thisBlackBox = thompsonSubset(parsedRegex.OpLeft); //a #-OR-# op contains ONLY a left branch #region DEBUG #if DEBUG if (parsedRegex.OpRight != null) { Console.INSTANCE.WriteLine("WARN: Right hand side of ZERO-OR-MORE operation present"); Console.INSTANCE.WriteLine("Duly noted, thoroughly ignored."); } #endif #endregion AutomatonNodeCore thisStateA = new AutomatonNodeCore(), thisStateB = new AutomatonNodeCore(), thisStateC = new AutomatonNodeCore(), thisStateD = new AutomatonNodeCore(); AutomatonTransition thisEpsilonA = new AutomatonTransition(thisStateB), specialEpsilonB = new AutomatonTransition(thisStateD), //Used in ? and * thisEpsilonC = new AutomatonTransition(thisBlackBox.First.Value), thisEpsilonD = new AutomatonTransition(thisStateC), thisEpsilonE = new AutomatonTransition(thisStateD), specialEpsilonF = new AutomatonTransition(thisStateB); //Used in + and * thisStateA.children.Add(thisEpsilonA); if (parsedRegex.Op.Character == '*' || parsedRegex.Op.Character == '?') //This state doesn't exist in ONE-OR-MORE { thisStateA.children.Add(specialEpsilonB); } thisStateB.children.Add(thisEpsilonC); thisBlackBox.Last.Value.children.Add(thisEpsilonD); thisStateC.children.Add(thisEpsilonE); if (parsedRegex.Op.Character == '*' || parsedRegex.Op.Character == '+') //This state doesn't exist in ZERO-OR-ONE { thisStateC.children.Add(specialEpsilonF); } subset.AddLast(thisStateA); subset.AddLast(thisStateB); foreach (AutomatonNodeCore n in thisBlackBox) { subset.AddLast(n); } subset.AddLast(thisStateC); subset.AddLast(thisStateD); #endregion break; case '.': /* * DOT construction: --> BLACK BOX A -ε-> BLACK BOX B --> */ #region DOT LinkedList <AutomatonNodeCore> dotBlackBoxA, dotBlackBoxB; //A DOT operation should always have a left branch and a right branch dotBlackBoxA = thompsonSubset(parsedRegex.OpLeft); dotBlackBoxB = thompsonSubset(parsedRegex.OpRight); AutomatonTransition dotTransition = new AutomatonTransition(dotBlackBoxB.First.Value); dotBlackBoxA.Last.Value.children.Add(dotTransition); subset = dotBlackBoxA; foreach (AutomatonNodeCore n in dotBlackBoxB) { subset.AddLast(n); } #endregion break; default: /* * TERMINAL construction -'character'-> */ #region TERM if (parsedRegex.isTerminal) { AutomatonNodeCore termStateA = new AutomatonNodeCore(); AutomatonNodeCore termStateB = new AutomatonNodeCore(); AutomatonTransition termTransition = new AutomatonTransition(termStateB); termTransition.acceptedSymbols.Add(parsedRegex.Op.Character); termStateA.children.Add(termTransition); subset.AddLast(termStateA); subset.AddLast(termStateB); } else { return(null); //Terminal that's not a terminal } #endregion break; } if (subset.Count == 0) { return(null); } return(subset); }
private static AutomatonCore prepareForOperator(AutomatonCore core1, AutomatonCore core2)//Accounts for same alphabet { convertNames(core1); convertNames(core2); AutomatonCore returnCore = new AutomatonCore(false); HashSet <char> alphabet = new HashSet <char>(); foreach (AutomatonNodeCore nodeCore1 in core1.nodes) { foreach (AutomatonTransition trans1 in nodeCore1.children) { foreach (char alpha in trans1.acceptedSymbols) { alphabet.Add(alpha); } } } List <AutomatonNodeCore> newStatesAsNodes = new List <AutomatonNodeCore>(); foreach (AutomatonNodeCore nodeCore1 in core1.nodes) { foreach (AutomatonNodeCore nodeCore2 in core2.nodes) { AutomatonNodeCore nodeCore = new AutomatonNodeCore(); nodeCore.stateName = nodeCore1.stateName + "," + nodeCore2.stateName; newStatesAsNodes.Add(nodeCore); } } foreach (AutomatonNodeCore nodeCore1 in newStatesAsNodes) { string[] origins = nodeCore1.stateName.Split(','); Dictionary <char, string> childs = new Dictionary <char, string>(); foreach (AutomatonNodeCore ogCore1 in core1.nodes) { if (ogCore1.stateName == origins[0]) { foreach (AutomatonTransition ogTrans1 in ogCore1.children) { foreach (char alpha in ogTrans1.acceptedSymbols) { childs[alpha] = ogTrans1.automatonNode.stateName; } } } } foreach (AutomatonNodeCore ogCore2 in core2.nodes) { if (ogCore2.stateName == origins[1]) { foreach (AutomatonTransition ogTrans2 in ogCore2.children) { foreach (char alpha in ogTrans2.acceptedSymbols) { childs[alpha] += "," + ogTrans2.automatonNode.stateName; } } } } foreach (KeyValuePair <char, string> child in childs) { foreach (AutomatonNodeCore nodeCore2 in newStatesAsNodes) { if (nodeCore2.stateName == child.Value) { AutomatonTransition trans3 = new AutomatonTransition(nodeCore2); trans3.acceptedSymbols.Add(child.Key); nodeCore1.children.Add(trans3); AutomatonTransition trans4 = new AutomatonTransition(nodeCore1); trans4.acceptedSymbols.Add(child.Key); nodeCore2.parents.Add(trans4); break; } } } } returnCore.nodes.AddRange(newStatesAsNodes); return(returnCore); }
private static AutomatonCore statesToAutomatonCore(List <AutomatonNodeCore> originalAutomatonNodes, Dictionary <string, Dictionary <char, HashSet <string> > > states) { AutomatonCore automatonCore = new AutomatonCore(true); foreach (KeyValuePair <string, Dictionary <char, HashSet <string> > > state in states) { AutomatonNodeCore node = new AutomatonNodeCore(); string[] parts = state.Key.Trim().Split(','); foreach (AutomatonNodeCore ogNode in originalAutomatonNodes) { bool contains = false; foreach (string item in parts) { if (item == ogNode.stateName) { contains = true; break; } } if (!contains) { continue; } if (ogNode.isBeginNode) { node.isBeginNode = true; } if (ogNode.isEndNode) { node.isEndNode = true; } } node.stateName = state.Key; automatonCore.nodes.Add(node); } foreach (AutomatonNodeCore node in automatonCore.nodes) { Dictionary <char, HashSet <string> > state = states[node.stateName]; foreach (KeyValuePair <char, HashSet <string> > alphaStatePair in state) { string childStateName = ""; bool first = true; foreach (string childNamePart in alphaStatePair.Value) { if (!first) { childStateName += ","; } childStateName += childNamePart; first = false; } foreach (AutomatonNodeCore node2 in automatonCore.nodes) { if (node2.stateName != childStateName) { continue; } AutomatonTransition trans1 = new AutomatonTransition(node2); trans1.acceptedSymbols.Add(alphaStatePair.Key); node.children.Add(trans1); AutomatonTransition trans2 = new AutomatonTransition(node); trans2.acceptedSymbols.Add(alphaStatePair.Key); node2.parents.Add(trans2); break; } } } HashSet <char> alphabet = new HashSet <char>(); foreach (AutomatonNodeCore node in originalAutomatonNodes) { foreach (AutomatonTransition trans in node.children) { foreach (char alpha in trans.acceptedSymbols) { alphabet.Add(alpha); } } } bool errorStateMade = false; AutomatonNodeCore errorState = null; foreach (AutomatonNodeCore node1 in automatonCore.nodes) { HashSet <char> containingAlphabet = new HashSet <char>(); foreach (AutomatonTransition trans5 in node1.children) { foreach (char alpha in trans5.acceptedSymbols) { containingAlphabet.Add(alpha); } } if (containingAlphabet.Count != alphabet.Count && !errorStateMade) { errorState = new AutomatonNodeCore(); errorState.stateName = "ø"; AutomatonTransition trans1 = new AutomatonTransition(errorState); foreach (char alpha in alphabet) { trans1.acceptedSymbols.Add(alpha); } errorState.children.Add(trans1); errorStateMade = true; } if (containingAlphabet.Count != alphabet.Count) { foreach (char alpha in alphabet) { bool found = false; foreach (AutomatonTransition trans6 in node1.children) { foreach (char alpha2 in trans6.acceptedSymbols) { if (alpha2 == alpha) { found = true; break; } } if (found) { break; } } if (!found) { AutomatonTransition trans1 = new AutomatonTransition(node1); trans1.acceptedSymbols.Add(alpha); errorState.parents.Add(trans1); AutomatonTransition trans2 = new AutomatonTransition(errorState); trans2.acceptedSymbols.Add(alpha); node1.children.Add(trans2); } } } } if (errorStateMade) { automatonCore.nodes.Add(errorState); } automatonCore.nondeterministic = false; return(automatonCore); }
public AutomatonCore changeToNDFA() { AutomatonCore automatonCore = new AutomatonCore(true); foreach (string symbol in symbols) { AutomatonNodeCore ac = new AutomatonNodeCore(); ac.stateName = symbol; if (startSymbols.Contains(symbol)) { ac.isBeginNode = true; ac.isEndNode = false; } else if (endSymbols.Contains(symbol)) { ac.isBeginNode = false; ac.isEndNode = true; } else { ac.isBeginNode = false; ac.isEndNode = false; } automatonCore.nodes.Add(ac); } foreach (ProductLine pl in productionLines) { foreach (AutomatonNodeCore node in automatonCore.nodes) { if (pl.fromSymbol == node.stateName) { //add to children bool newTrans = true; //Check if there already is a transition with the samen nodecores //If there is, add the extra state letter foreach (AutomatonTransition tr in node.children) { if (tr.automatonNode.stateName == pl.toSymbol) { tr.acceptedSymbols.Add(pl.letter[0]); newTrans = false; } } if (newTrans) { AutomatonTransition trans = null; foreach (AutomatonNodeCore endNode in automatonCore.nodes) { if (endNode.stateName == pl.toSymbol) { trans = new AutomatonTransition(endNode); } } trans.acceptedSymbols.Add(pl.letter[0]); node.children.Add(trans); } } else if (pl.toSymbol == node.stateName) { //add to parent bool newTrans = true; //Check if there already is a transition with the samen nodecores //If there is, add the extra state letter foreach (AutomatonTransition tr in node.parents) { if (tr.automatonNode.stateName == pl.toSymbol) { tr.acceptedSymbols.Add(pl.letter[0]); newTrans = false; } } if (newTrans) { AutomatonTransition trans = null; foreach (AutomatonNodeCore firstNode in automatonCore.nodes) { if (firstNode.stateName == pl.fromSymbol) { trans = new AutomatonTransition(firstNode); } } trans.acceptedSymbols.Add(pl.letter[0]); node.parents.Add(trans); } } } } return(automatonCore); }