//Adds a control character transition private RegexTrie <T> addSpecialTransition(int controlCode, RegexTrie <T> endNodeOfPreviousComponent) { RegexTrie <T> startNodeOfCurrentComponent = new RegexTrie <T>(); //Connect start node to previous end node endNodeOfPreviousComponent.AddEpsilonTransition(startNodeOfCurrentComponent); //Use STX control character return(startNodeOfCurrentComponent.GetOrAddChildNode((char)controlCode, 0)); }
//This has to keep track of four nodes: end of the previous component, start of the current component, //end of the current component and the node that is connected to the end of the current component with //an epsilon transition (which will become the end of the previous component for the next component). public void AddToRegexTrie (RegexTrie <T> trie, string source, T matchobject, Stack <RegexTrie <T> > endStack, Stack <RegexTrie <T> > startStack, Stack <RegexTrie <T> > commonDestination) { RegexTrie <T> newTrie = new RegexTrie <T>(); //Current trie being added to RegexTrie <T> endNodeOfCurrentComponent = trie; //End of previous trie, to which the current trie is joined RegexTrie <T> endNodeOfPreviousComponent = trie; //End of previous trie, to which the current trie is joined RegexTrie <T> startNodeOfCurrentComponent = trie; //Push trie on the start stack startStack.Push(trie); //Push a null on the commonStart and commonDestination stacks commonDestination.Push(null); //Matching group counter. byte groupCount = 1; //True if previous character was an escape char Boolean escapedCharacter = false; //Variable for the character at the loop index char currentChar; //Holds the result of the quantifier check KeyValuePair <int, RegexTrie <T> > quantifierCheckResult; for (int stringIndex = 0; stringIndex < source.Length; stringIndex++) { currentChar = source[stringIndex]; //Check for escape character if (currentChar == '\\') { escapedCharacter = true; continue; } //If the character is escaped, just make it into a trie if (escapedCharacter) { //Special char handling if (currentChar == 't') { currentChar = '\t'; } startNodeOfCurrentComponent = new RegexTrie <T>(); endNodeOfCurrentComponent = startNodeOfCurrentComponent.GetOrAddChildNode(currentChar, 0); quantifierCheckResult = checkForQuantifier( source.Substring(stringIndex + 1), endNodeOfCurrentComponent, startNodeOfCurrentComponent, endNodeOfPreviousComponent, commonDestination); stringIndex += quantifierCheckResult.Key; endNodeOfPreviousComponent = quantifierCheckResult.Value; escapedCharacter = false; continue; } //Else check character for special meaning else { switch (currentChar) { //Open a new group case '(': { //Push the previous trie to the end stack endStack.Push(endNodeOfPreviousComponent); //Create a new trie endNodeOfPreviousComponent = new RegexTrie <T>(); //Push the newly created trie on the start stack startStack.Push(endNodeOfPreviousComponent); //Push a null trie on the common destination and start stacks (to be defined, if pipes are found) commonDestination.Push(null); break; } //Close a group case ')': { //If common destination exists, add an epsilon transition to it if (commonDestination.Peek() != null) { //Connect the end node and common destination as part of the same epsilon closure commonDestination.Peek().AddEpsilonTransition(endNodeOfPreviousComponent); endNodeOfPreviousComponent.AddEpsilonTransition(commonDestination.Peek()); //Move the current trie to common destination, as that's where the building will continue //Pop the common destination, it won't be needed anymore endNodeOfCurrentComponent = commonDestination.Pop(); } else { endNodeOfCurrentComponent = endNodeOfPreviousComponent; //Pop the null destination commonDestination.Pop(); } startNodeOfCurrentComponent = startStack.Pop(); endNodeOfPreviousComponent = endStack.Pop(); quantifierCheckResult = checkForQuantifier( source.Substring(stringIndex + 1), endNodeOfCurrentComponent, startNodeOfCurrentComponent, endNodeOfPreviousComponent, commonDestination); stringIndex += quantifierCheckResult.Key; endNodeOfPreviousComponent = quantifierCheckResult.Value; try { endStack.Peek(); } //If we're at the top, add group number to each node of the trie catch { addGroupNumbers(startNodeOfCurrentComponent, groupCount); groupCount++; } break; } //Handle square bracket set case '[': { startNodeOfCurrentComponent = new RegexTrie <T>(); endNodeOfCurrentComponent = new RegexTrie <T>(); //This skips over the closing square bracket, so there's no need for closing square bracket handling stringIndex += handleSquareBracketGroup(startNodeOfCurrentComponent, endNodeOfCurrentComponent, source.Substring(stringIndex + 1)); quantifierCheckResult = checkForQuantifier( source.Substring(stringIndex + 1), endNodeOfCurrentComponent, startNodeOfCurrentComponent, endNodeOfPreviousComponent, commonDestination); stringIndex += quantifierCheckResult.Key; endNodeOfPreviousComponent = quantifierCheckResult.Value; break; } //Caret at the start: add a transition with a control character that won't exist in text. //Feed the control character when finding matches. case '^': { endNodeOfPreviousComponent = addSpecialTransition(2, endNodeOfPreviousComponent); break; } //Dollar at end: add a transition with a control character that won't exist in text. //Feed the control character when finding matches. case '$': { endNodeOfPreviousComponent = addSpecialTransition(3, endNodeOfPreviousComponent); break; } //Period handling case '.': { startNodeOfCurrentComponent = new RegexTrie <T>(); endNodeOfCurrentComponent = new RegexTrie <T>(); //Add complement of null character startNodeOfCurrentComponent.complementTransitions.Add( new ComplementTransition <T>( 0, endNodeOfCurrentComponent, new List <char>(), new List <KeyValuePair <char, char> >() { new KeyValuePair <char, char>((char)0, (char)0) })); quantifierCheckResult = checkForQuantifier( source.Substring(stringIndex + 1), endNodeOfCurrentComponent, startNodeOfCurrentComponent, endNodeOfPreviousComponent, commonDestination); stringIndex += quantifierCheckResult.Key; endNodeOfPreviousComponent = quantifierCheckResult.Value; break; } //Change previous end to common destination and move end node to common start case '|': { if (commonDestination.Peek() == null) { commonDestination.Pop(); commonDestination.Push(endNodeOfPreviousComponent); } else { //Connect the end node and common destination as part of the same epsilon closure commonDestination.Peek().AddEpsilonTransition(endNodeOfPreviousComponent); endNodeOfPreviousComponent.AddEpsilonTransition(commonDestination.Peek()); } endNodeOfPreviousComponent = startStack.Peek(); break; } default: { startNodeOfCurrentComponent = new RegexTrie <T>(); endNodeOfCurrentComponent = startNodeOfCurrentComponent.GetOrAddChildNode(currentChar, 0); quantifierCheckResult = checkForQuantifier( source.Substring(stringIndex + 1), endNodeOfCurrentComponent, startNodeOfCurrentComponent, endNodeOfPreviousComponent, commonDestination); stringIndex += quantifierCheckResult.Key; endNodeOfPreviousComponent = quantifierCheckResult.Value; break; } } } } //Link end node to common destination and start node to common start if they exist if (commonDestination.Peek() != null) { endNodeOfPreviousComponent.AddEpsilonTransition(commonDestination.Peek()); endNodeOfPreviousComponent = commonDestination.Pop(); } //Add translation and replace fields to the current trie endNodeOfPreviousComponent.matches = new List <T> { matchobject }; }