/** * Parses a regular expression factor. This method handles the * Fact production in the grammar (see regexp.grammar). * * @param start the initial NFA state * * @return the terminating NFA state * * @throws RegExpException if an error was encountered in the * pattern string */ private NFAState ParseFact(NFAState start) { NFAState placeholder = new NFAState(); NFAState end; end = ParseAtom(placeholder); switch (PeekChar(0)) { case '?': case '*': case '+': case '{': end = ParseAtomModifier(placeholder, end); break; } if (placeholder.incoming.Length > 0 && start.outgoing.Length > 0) { start.AddOut(new NFAEpsilonTransition(placeholder)); return(end); } else { placeholder.MergeInto(start); return((end == placeholder) ? start : end); } }
/** * Parses a regular expression. This method handles the Expr * production in the grammar (see regexp.grammar). * * @param start the initial NFA state * * @return the terminating NFA state * * @throws RegExpException if an error was encountered in the * pattern string */ private NFAState ParseExpr(NFAState start) { NFAState end = new NFAState(); NFAState subStart; NFAState subEnd; do { if (PeekChar(0) == '|') { ReadChar('|'); } subStart = new NFAState(); subEnd = ParseTerm(subStart); if (subStart.incoming.Length == 0) { subStart.MergeInto(start); } else { start.AddOut(new NFAEpsilonTransition(subStart)); } if (subEnd.outgoing.Length == 0 || (!end.HasTransitions() && PeekChar(0) != '|')) { subEnd.MergeInto(end); } else { subEnd.AddOut(new NFAEpsilonTransition(end)); } } while (PeekChar(0) == '|'); return(end); }
/// <summary> /// Parses a regular expression. This method handles the <c>Expr</c> /// production in the grammar (see regexp.grammar). /// </summary> /// <param name="start">The initial NFA state</param> /// <returns>The terminating NFA state</returns> /// <exception cref="RegExpException"> /// If an error was encountered in the pattern string /// </exception> private NFAState ParseExpr(NFAState start) { NFAState end = new NFAState(); NFAState subStart; NFAState subEnd; do { if (this.PeekChar(0) == '|') { this.ReadChar('|'); } subStart = new NFAState(); subEnd = this.ParseTerm(subStart); if (subStart.Incoming.Count == 0) { subStart.MergeInto(start); } else { start.AddOut(new NFAEpsilonTransition(subStart)); } if (subEnd.Outgoing.Count == 0 || (!end.HasTransitions && this.PeekChar(0) != '|')) { subEnd.MergeInto(end); } else { subEnd.AddOut(new NFAEpsilonTransition(end)); } }while (this.PeekChar(0) == '|'); return(end); }
/** * Parses a regular expression atom modifier. This method handles * the AtomModifier production in the grammar (see regexp.grammar). * * @param start the initial NFA state * @param end the terminal NFA state * * @return the terminating NFA state * * @throws RegExpException if an error was encountered in the * pattern string */ private NFAState ParseAtomModifier(NFAState start, NFAState end) { int min = 0; int max = -1; int firstPos = pos; // Read min and max switch (ReadChar()) { case '?': min = 0; max = 1; break; case '*': min = 0; max = -1; break; case '+': min = 1; max = -1; break; case '{': min = ReadNumber(); max = min; if (PeekChar(0) == ',') { ReadChar(','); max = -1; if (PeekChar(0) != '}') { max = ReadNumber(); } } ReadChar('}'); if (max == 0 || (max > 0 && min > max)) { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, pattern); } break; default: throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, pos - 1, pattern); } // Read possessive or reluctant modifiers if (PeekChar(0) == '?') { throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, pos, pattern); } else if (PeekChar(0) == '+') { throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, pos, pattern); } // Handle supported repeaters if (min == 0 && max == 1) { return start.AddOut(new NFAEpsilonTransition(end)); } else if (min == 0 && max == -1) { if (end.outgoing.Length == 0) { end.MergeInto(start); } else { end.AddOut(new NFAEpsilonTransition(start)); } return start; } else if (min == 1 && max == -1) { if (start.outgoing.Length == 1 && end.outgoing.Length == 0 && end.incoming.Length == 1 && start.outgoing[0] == end.incoming[0]) { end.AddOut(start.outgoing[0].Copy(end)); } else { end.AddOut(new NFAEpsilonTransition(start)); } return end; } else { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, pattern); } }
/** * Parses a regular expression factor. This method handles the * Fact production in the grammar (see regexp.grammar). * * @param start the initial NFA state * * @return the terminating NFA state * * @throws RegExpException if an error was encountered in the * pattern string */ private NFAState ParseFact(NFAState start) { NFAState placeholder = new NFAState(); NFAState end; end = ParseAtom(placeholder); switch (PeekChar(0)) { case '?': case '*': case '+': case '{': end = ParseAtomModifier(placeholder, end); break; } if (placeholder.incoming.Length > 0 && start.outgoing.Length > 0) { start.AddOut(new NFAEpsilonTransition(placeholder)); return end; } else { placeholder.MergeInto(start); return (end == placeholder) ? start : end; } }
/** * Parses a regular expression. This method handles the Expr * production in the grammar (see regexp.grammar). * * @param start the initial NFA state * * @return the terminating NFA state * * @throws RegExpException if an error was encountered in the * pattern string */ private NFAState ParseExpr(NFAState start) { NFAState end = new NFAState(); NFAState subStart; NFAState subEnd; do { if (PeekChar(0) == '|') { ReadChar('|'); } subStart = new NFAState(); subEnd = ParseTerm(subStart); if (subStart.incoming.Length == 0) { subStart.MergeInto(start); } else { start.AddOut(new NFAEpsilonTransition(subStart)); } if (subEnd.outgoing.Length == 0 || (!end.HasTransitions() && PeekChar(0) != '|')) { subEnd.MergeInto(end); } else { subEnd.AddOut(new NFAEpsilonTransition(end)); } } while (PeekChar(0) == '|'); return end; }
/** * Parses a regular expression atom modifier. This method handles * the AtomModifier production in the grammar (see regexp.grammar). * * @param start the initial NFA state * @param end the terminal NFA state * * @return the terminating NFA state * * @throws RegExpException if an error was encountered in the * pattern string */ private NFAState ParseAtomModifier(NFAState start, NFAState end) { int min = 0; int max = -1; int firstPos = pos; // Read min and max switch (ReadChar()) { case '?': min = 0; max = 1; break; case '*': min = 0; max = -1; break; case '+': min = 1; max = -1; break; case '{': min = ReadNumber(); max = min; if (PeekChar(0) == ',') { ReadChar(','); max = -1; if (PeekChar(0) != '}') { max = ReadNumber(); } } ReadChar('}'); if (max == 0 || (max > 0 && min > max)) { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, pattern); } break; default: throw new RegExpException( RegExpException.ErrorType.UNEXPECTED_CHARACTER, pos - 1, pattern); } // Read possessive or reluctant modifiers if (PeekChar(0) == '?') { throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, pos, pattern); } else if (PeekChar(0) == '+') { throw new RegExpException( RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER, pos, pattern); } // Handle supported repeaters if (min == 0 && max == 1) { return(start.AddOut(new NFAEpsilonTransition(end))); } else if (min == 0 && max == -1) { if (end.outgoing.Length == 0) { end.MergeInto(start); } else { end.AddOut(new NFAEpsilonTransition(start)); } return(start); } else if (min == 1 && max == -1) { if (start.outgoing.Length == 1 && end.outgoing.Length == 0 && end.incoming.Length == 1 && start.outgoing[0] == end.incoming[0]) { end.AddOut(start.outgoing[0].Copy(end)); } else { end.AddOut(new NFAEpsilonTransition(start)); } return(end); } else { throw new RegExpException( RegExpException.ErrorType.INVALID_REPEAT_COUNT, firstPos, pattern); } }
/// <summary> /// Parses a regular expression atom modifier. This method handles /// the AtomModifier production in the grammar (see regexp.grammar). /// </summary> /// <param name="start">The initial NFA state</param> /// <param name="end">The terminal NFA state</param> /// <returns>The terminating NFA state</returns> /// <exception cref="RegExpException"> /// If an error was encountered in the pattern string /// </exception> private NFAState ParseAtomModifier( NFAState start, NFAState end) { int min; int max; int firstPos = this.pos; // Read min and max switch (this.ReadChar()) { case '?': min = 0; max = 1; break; case '*': min = 0; max = -1; break; case '+': min = 1; max = -1; break; case '{': min = this.ReadNumber(); max = min; if (this.PeekChar(0) == ',') { this.ReadChar(','); max = -1; if (this.PeekChar(0) != '}') { max = this.ReadNumber(); } } this.ReadChar('}'); if (max == 0 || (max > 0 && min > max)) { throw new RegExpException( RegExpException.ErrorType.InvalidRepeatCount, firstPos, this.pattern); } break; default: throw new RegExpException( RegExpException.ErrorType.UnexpectedCharacter, this.pos - 1, this.pattern); } // Read possessive or reluctant modifiers if (this.PeekChar(0) == '?') { throw new RegExpException( RegExpException.ErrorType.UnsupportedSpecialCharacter, this.pos, this.pattern); } else if (this.PeekChar(0) == '+') { throw new RegExpException( RegExpException.ErrorType.UnsupportedSpecialCharacter, this.pos, this.pattern); } // Handle supported repeaters if (min == 0 && max == 1) { return(start.AddOut(new NFAEpsilonTransition(end))); } else if (min == 0 && max == -1) { if (end.Outgoing.Count == 0) { end.MergeInto(start); } else { end.AddOut(new NFAEpsilonTransition(start)); } return(start); } else if (min == 1 && max == -1) { if (start.Outgoing.Count == 1 && end.Outgoing.Count == 0 && end.Incoming.Count == 1 && start.Outgoing[0] == end.Incoming[0]) { end.AddOut(start.Outgoing[0].Copy(end)); } else { end.AddOut(new NFAEpsilonTransition(start)); } return(end); } else { throw new RegExpException( RegExpException.ErrorType.InvalidRepeatCount, firstPos, this.pattern); } }