private IList<string> DoSubmatch(string re, string input) { var tokenStream = new TokenStream(new StringReader(re)); var parser = new RDParser(tokenStream); var ast = parser.Parse(); var nfa = new NFAGraph(ast); var dfa = new DFAGraph(nfa); return dfa.SimulateSubmatch(input); }
public DFAGraph(NFAGraph nfa) { Alphabet = nfa.Alphabet; FinalStates = new HashSet<DFAState>(); AdjList = new Dictionary<DFAState, IDictionary<CharacterClassElement, DFAState>>(); var stateVisitQueue = new Queue<DFAState>(); var marked = new HashSet<DFAState>(); //kgo subset construction var startEClosure = nfa.EClosure(nfa.StartState); StartState = new DFAState(startEClosure); stateVisitQueue.Enqueue(StartState); if (StartState.Tags.Contains(FAStateTags.Final)) FinalStates.Add(StartState); AdjList[StartState] = new Dictionary<CharacterClassElement, DFAState>(); while (stateVisitQueue.Count > 0) { var thisState = stateVisitQueue.Dequeue(); marked.Add(thisState); foreach (var symbol in Alphabet) { var u = nfa.EClosure(nfa.Move(thisState.Constituents.ToList(), symbol)); if (u.Count > 0) { var dfaU = new DFAState(u); if (!AdjList.Keys.Contains(dfaU)) { if (dfaU.Tags.Contains(FAStateTags.Final)) FinalStates.Add(dfaU); stateVisitQueue.Enqueue(dfaU); AdjList[dfaU] = new Dictionary<CharacterClassElement, DFAState>(); } LinkStates(thisState, dfaU, symbol); } } } }
private bool DoMatch(string re, string input) { var tokenStream = new TokenStream(new StringReader(re)); var parser = new RDParser(tokenStream); var ast = parser.Parse(); var nfa = new NFAGraph(ast); return nfa.RecursiveMatch(input); }
public NFAGraph(RegexNode rxNode, StateFactory fac = null) { Alphabet = CharacterClassMapper.NormaliseAST(rxNode); StateFac = fac ?? new StateFactory(); AdjList = new Dictionary<NFAState, IDictionary<CharacterClassElement, ISet<NFAState>>>(); //begin from the start StartState = MakeState(); StartState.Tags.Add(FAStateTags.Start); FinalState = MakeState(); FinalState.Tags.Add(FAStateTags.Final); //every alternative is connected to this for (var altNode = rxNode.FirstAlternative; altNode != null; altNode = altNode.Next) { //node for this alt var altPosNode = MakeState(); LinkStates(StartState, altPosNode, Empty); NFAState oldPosNode = null; //Iter the and matches for (var matchNode = altNode.FirstFactor; matchNode != null; matchNode = matchNode.Next) { oldPosNode = altPosNode; if (matchNode is CharacterClassMatchNode) { var ccn = (CharacterClassMatchNode) matchNode; var thisState = MakeState(); LinkStates(altPosNode, thisState, ccn.MatchingCharacterClass); altPosNode = thisState; } else if (matchNode is GroupMatchNode) { var gn = (GroupMatchNode) matchNode; //Create a NFA for the group and join it in var groupGraph = new NFAGraph(gn.Body, StateFac); //Merge all their states into ours foreach (var oState in groupGraph.AdjList.Keys) AdjList[oState] = groupGraph.AdjList[oState]; LinkStates(altPosNode, groupGraph.StartState, Empty); //tag it groupGraph.StartState.Tags.Remove(FAStateTags.Start); groupGraph.StartState.Tags.Add(FAStateTags.PushSubmatch); groupGraph.FinalState.Tags.Remove(FAStateTags.Final); //destroys final tag as intended groupGraph.FinalState.Tags.Add(FAStateTags.PopSubmatch); altPosNode = groupGraph.FinalState; } //operators? switch (matchNode.OpType) { case UnaryOperatorType.NoneMany: //kleene star //link the state we just made back via e LinkStates(altPosNode, oldPosNode, Empty); var newTermination = MakeState(); LinkStates(oldPosNode, newTermination, Empty); oldPosNode = altPosNode; altPosNode = newTermination; break; case UnaryOperatorType.Optional: LinkStates(oldPosNode, altPosNode, Empty); //skip path we just made break; case UnaryOperatorType.OneMany: LinkStates(altPosNode, oldPosNode, Empty); //make a cycle break; } } //join it into the end LinkStates(altPosNode, FinalState, Empty); } }