public static ISet<CharacterClassElement> NormaliseAST(RegexNode rootNode) { //walk the tree and find all char classes var matchClassLink = new Dictionary<CharacterClass, CharacterClassMatchNode>(); Action<RegexNode> classFinder = null; classFinder = (rxNode) => { var curAltNode = rxNode.FirstAlternative; while (curAltNode != null) { var curMatchNode = curAltNode.FirstFactor; while (curMatchNode != null) { if (curMatchNode is GroupMatchNode) { var g = (GroupMatchNode) curMatchNode; classFinder(g.Body); } else if (curMatchNode is CharacterClassMatchNode) { var cn = (CharacterClassMatchNode) curMatchNode; matchClassLink[cn.MatchingCharacterClass] = cn; } curMatchNode = curMatchNode.Next; } curAltNode = curAltNode.Next; } }; classFinder(rootNode); //that's filled, now map them //but hide a [min-max] class to make sre the alphabet is total var allClass = new CharacterClass(); allClass.Elements.Add(new CharacterClassElement(char.MinValue, char.MaxValue)); //matchClassLink[allClass] = null; var rStruct = NormaliseCharacterClasses(matchClassLink.Keys.ToList()); foreach (var cClass in matchClassLink.Keys) { //if (cClass != allClass) //skip the one we added matchClassLink[cClass].MatchingCharacterClass = rStruct.Mapping[cClass]; } return rStruct.Alphabet; }
public GroupMatchNode(RegexNode body = null, bool capturing = true) { Body = body; Capturing = capturing; }
public NFAGraph(RegexNode rxNode, StateFactory fac = null) { Alphabet = CharacterClassMapper.NormaliseAST(rxNode); StateFac = fac ?? new StateFactory(); AdjList = new Dictionary<NFAState, IDictionary<CharacterClassElement, ISet<NFAState>>>(); //begin from the start StartState = MakeState(); StartState.Tags.Add(FAStateTags.Start); FinalState = MakeState(); FinalState.Tags.Add(FAStateTags.Final); //every alternative is connected to this for (var altNode = rxNode.FirstAlternative; altNode != null; altNode = altNode.Next) { //node for this alt var altPosNode = MakeState(); LinkStates(StartState, altPosNode, Empty); NFAState oldPosNode = null; //Iter the and matches for (var matchNode = altNode.FirstFactor; matchNode != null; matchNode = matchNode.Next) { oldPosNode = altPosNode; if (matchNode is CharacterClassMatchNode) { var ccn = (CharacterClassMatchNode) matchNode; var thisState = MakeState(); LinkStates(altPosNode, thisState, ccn.MatchingCharacterClass); altPosNode = thisState; } else if (matchNode is GroupMatchNode) { var gn = (GroupMatchNode) matchNode; //Create a NFA for the group and join it in var groupGraph = new NFAGraph(gn.Body, StateFac); //Merge all their states into ours foreach (var oState in groupGraph.AdjList.Keys) AdjList[oState] = groupGraph.AdjList[oState]; LinkStates(altPosNode, groupGraph.StartState, Empty); //tag it groupGraph.StartState.Tags.Remove(FAStateTags.Start); groupGraph.StartState.Tags.Add(FAStateTags.PushSubmatch); groupGraph.FinalState.Tags.Remove(FAStateTags.Final); //destroys final tag as intended groupGraph.FinalState.Tags.Add(FAStateTags.PopSubmatch); altPosNode = groupGraph.FinalState; } //operators? switch (matchNode.OpType) { case UnaryOperatorType.NoneMany: //kleene star //link the state we just made back via e LinkStates(altPosNode, oldPosNode, Empty); var newTermination = MakeState(); LinkStates(oldPosNode, newTermination, Empty); oldPosNode = altPosNode; altPosNode = newTermination; break; case UnaryOperatorType.Optional: LinkStates(oldPosNode, altPosNode, Empty); //skip path we just made break; case UnaryOperatorType.OneMany: LinkStates(altPosNode, oldPosNode, Empty); //make a cycle break; } } //join it into the end LinkStates(altPosNode, FinalState, Empty); } }