示例#1
0
        /**
         * Parses a regular expression factor. This method handles the
         * Fact production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseFact(NFAState start)
        {
            NFAState placeholder = new NFAState();
            NFAState end;

            end = ParseAtom(placeholder);
            switch (PeekChar(0))
            {
            case '?':
            case '*':
            case '+':
            case '{':
                end = ParseAtomModifier(placeholder, end);
                break;
            }
            if (placeholder.incoming.Length > 0 && start.outgoing.Length > 0)
            {
                start.AddOut(new NFAEpsilonTransition(placeholder));
                return(end);
            }
            else
            {
                placeholder.MergeInto(start);
                return((end == placeholder) ? start : end);
            }
        }
示例#2
0
        /**
         * Parses a regular expression. This method handles the Expr
         * production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseExpr(NFAState start)
        {
            NFAState end = new NFAState();
            NFAState subStart;
            NFAState subEnd;

            do
            {
                if (PeekChar(0) == '|')
                {
                    ReadChar('|');
                }
                subStart = new NFAState();
                subEnd   = ParseTerm(subStart);
                if (subStart.incoming.Length == 0)
                {
                    subStart.MergeInto(start);
                }
                else
                {
                    start.AddOut(new NFAEpsilonTransition(subStart));
                }
                if (subEnd.outgoing.Length == 0 ||
                    (!end.HasTransitions() && PeekChar(0) != '|'))
                {
                    subEnd.MergeInto(end);
                }
                else
                {
                    subEnd.AddOut(new NFAEpsilonTransition(end));
                }
            } while (PeekChar(0) == '|');
            return(end);
        }
        /// <summary>
        /// Parses a regular expression. This method handles the <c>Expr</c>
        /// production in the grammar (see regexp.grammar).
        /// </summary>
        /// <param name="start">The initial NFA state</param>
        /// <returns>The terminating NFA state</returns>
        /// <exception cref="RegExpException">
        /// If an error was encountered in the pattern string
        /// </exception>
        private NFAState ParseExpr(NFAState start)
        {
            NFAState end = new NFAState();
            NFAState subStart;
            NFAState subEnd;

            do
            {
                if (this.PeekChar(0) == '|')
                {
                    this.ReadChar('|');
                }

                subStart = new NFAState();
                subEnd   = this.ParseTerm(subStart);

                if (subStart.Incoming.Count == 0)
                {
                    subStart.MergeInto(start);
                }
                else
                {
                    start.AddOut(new NFAEpsilonTransition(subStart));
                }

                if (subEnd.Outgoing.Count == 0 ||
                    (!end.HasTransitions && this.PeekChar(0) != '|'))
                {
                    subEnd.MergeInto(end);
                }
                else
                {
                    subEnd.AddOut(new NFAEpsilonTransition(end));
                }
            }while (this.PeekChar(0) == '|');

            return(end);
        }
示例#4
0
        /**
         * Parses a regular expression atom modifier. This method handles
         * the AtomModifier production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         * @param end            the terminal NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseAtomModifier(NFAState start, NFAState end) {
            int  min = 0;
            int  max = -1;
            int  firstPos = pos;

            // Read min and max
            switch (ReadChar()) {
            case '?':
                min = 0;
                max = 1;
                break;
            case '*':
                min = 0;
                max = -1;
                break;
            case '+':
                min = 1;
                max = -1;
                break;
            case '{':
                min = ReadNumber();
                max = min;
                if (PeekChar(0) == ',') {
                    ReadChar(',');
                    max = -1;
                    if (PeekChar(0) != '}') {
                        max = ReadNumber();
                    }
                }
                ReadChar('}');
                if (max == 0 || (max > 0 && min > max)) {
                    throw new RegExpException(
                        RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                        firstPos,
                        pattern);
                }
                break;
            default:
                throw new RegExpException(
                    RegExpException.ErrorType.UNEXPECTED_CHARACTER,
                    pos - 1,
                    pattern);
            }

            // Read possessive or reluctant modifiers
            if (PeekChar(0) == '?') {
                throw new RegExpException(
                    RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                    pos,
                    pattern);
            } else if (PeekChar(0) == '+') {
                throw new RegExpException(
                    RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                    pos,
                    pattern);
            }

            // Handle supported repeaters
            if (min == 0 && max == 1) {
                return start.AddOut(new NFAEpsilonTransition(end));
            } else if (min == 0 && max == -1) {
                if (end.outgoing.Length == 0) {
                    end.MergeInto(start);
                } else {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return start;
            } else if (min == 1 && max == -1) {
                if (start.outgoing.Length == 1 &&
                    end.outgoing.Length == 0 &&
                    end.incoming.Length == 1 &&
                    start.outgoing[0] == end.incoming[0]) {

                    end.AddOut(start.outgoing[0].Copy(end));
                } else {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return end;
            } else {
                throw new RegExpException(
                    RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                    firstPos,
                    pattern);
            }
        }
示例#5
0
        /**
         * Parses a regular expression factor. This method handles the
         * Fact production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseFact(NFAState start) {
            NFAState  placeholder = new NFAState();
            NFAState  end;

            end = ParseAtom(placeholder);
            switch (PeekChar(0)) {
            case '?':
            case '*':
            case '+':
            case '{':
                end = ParseAtomModifier(placeholder, end);
                break;
            }
            if (placeholder.incoming.Length > 0 && start.outgoing.Length > 0) {
                start.AddOut(new NFAEpsilonTransition(placeholder));
                return end;
            } else {
                placeholder.MergeInto(start);
                return (end == placeholder) ? start : end;
            }
        }
示例#6
0
        /**
         * Parses a regular expression. This method handles the Expr
         * production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseExpr(NFAState start) {
            NFAState  end = new NFAState();
            NFAState  subStart;
            NFAState  subEnd;

            do {
                if (PeekChar(0) == '|') {
                    ReadChar('|');
                }
                subStart = new NFAState();
                subEnd = ParseTerm(subStart);
                if (subStart.incoming.Length == 0) {
                    subStart.MergeInto(start);
                } else {
                    start.AddOut(new NFAEpsilonTransition(subStart));
                }
                if (subEnd.outgoing.Length == 0 ||
                    (!end.HasTransitions() && PeekChar(0) != '|')) {
                    subEnd.MergeInto(end);
                } else {
                    subEnd.AddOut(new NFAEpsilonTransition(end));
                }
            } while (PeekChar(0) == '|');
            return end;
        }
示例#7
0
        /**
         * Parses a regular expression atom modifier. This method handles
         * the AtomModifier production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         * @param end            the terminal NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseAtomModifier(NFAState start, NFAState end)
        {
            int min      = 0;
            int max      = -1;
            int firstPos = pos;

            // Read min and max
            switch (ReadChar())
            {
            case '?':
                min = 0;
                max = 1;
                break;

            case '*':
                min = 0;
                max = -1;
                break;

            case '+':
                min = 1;
                max = -1;
                break;

            case '{':
                min = ReadNumber();
                max = min;
                if (PeekChar(0) == ',')
                {
                    ReadChar(',');
                    max = -1;
                    if (PeekChar(0) != '}')
                    {
                        max = ReadNumber();
                    }
                }
                ReadChar('}');
                if (max == 0 || (max > 0 && min > max))
                {
                    throw new RegExpException(
                              RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                              firstPos,
                              pattern);
                }
                break;

            default:
                throw new RegExpException(
                          RegExpException.ErrorType.UNEXPECTED_CHARACTER,
                          pos - 1,
                          pattern);
            }

            // Read possessive or reluctant modifiers
            if (PeekChar(0) == '?')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          pos,
                          pattern);
            }
            else if (PeekChar(0) == '+')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          pos,
                          pattern);
            }

            // Handle supported repeaters
            if (min == 0 && max == 1)
            {
                return(start.AddOut(new NFAEpsilonTransition(end)));
            }
            else if (min == 0 && max == -1)
            {
                if (end.outgoing.Length == 0)
                {
                    end.MergeInto(start);
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return(start);
            }
            else if (min == 1 && max == -1)
            {
                if (start.outgoing.Length == 1 &&
                    end.outgoing.Length == 0 &&
                    end.incoming.Length == 1 &&
                    start.outgoing[0] == end.incoming[0])
                {
                    end.AddOut(start.outgoing[0].Copy(end));
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return(end);
            }
            else
            {
                throw new RegExpException(
                          RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                          firstPos,
                          pattern);
            }
        }
        /// <summary>
        /// Parses a regular expression atom modifier. This method handles
        /// the AtomModifier production in the grammar (see regexp.grammar).
        /// </summary>
        /// <param name="start">The initial NFA state</param>
        /// <param name="end">The terminal NFA state</param>
        /// <returns>The terminating NFA state</returns>
        /// <exception cref="RegExpException">
        /// If an error was encountered in the pattern string
        /// </exception>
        private NFAState ParseAtomModifier(
            NFAState start,
            NFAState end)
        {
            int min;
            int max;
            int firstPos = this.pos;

            // Read min and max
            switch (this.ReadChar())
            {
            case '?':
                min = 0;
                max = 1;
                break;

            case '*':
                min = 0;
                max = -1;
                break;

            case '+':
                min = 1;
                max = -1;
                break;

            case '{':
                min = this.ReadNumber();
                max = min;
                if (this.PeekChar(0) == ',')
                {
                    this.ReadChar(',');
                    max = -1;
                    if (this.PeekChar(0) != '}')
                    {
                        max = this.ReadNumber();
                    }
                }

                this.ReadChar('}');

                if (max == 0 || (max > 0 && min > max))
                {
                    throw new RegExpException(
                              RegExpException.ErrorType.InvalidRepeatCount,
                              firstPos,
                              this.pattern);
                }

                break;

            default:
                throw new RegExpException(
                          RegExpException.ErrorType.UnexpectedCharacter,
                          this.pos - 1,
                          this.pattern);
            }

            // Read possessive or reluctant modifiers
            if (this.PeekChar(0) == '?')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UnsupportedSpecialCharacter,
                          this.pos,
                          this.pattern);
            }
            else if (this.PeekChar(0) == '+')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UnsupportedSpecialCharacter,
                          this.pos,
                          this.pattern);
            }

            // Handle supported repeaters
            if (min == 0 && max == 1)
            {
                return(start.AddOut(new NFAEpsilonTransition(end)));
            }
            else if (min == 0 && max == -1)
            {
                if (end.Outgoing.Count == 0)
                {
                    end.MergeInto(start);
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }

                return(start);
            }
            else if (min == 1 && max == -1)
            {
                if (start.Outgoing.Count == 1 &&
                    end.Outgoing.Count == 0 &&
                    end.Incoming.Count == 1 &&
                    start.Outgoing[0] == end.Incoming[0])
                {
                    end.AddOut(start.Outgoing[0].Copy(end));
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }

                return(end);
            }
            else
            {
                throw new RegExpException(
                          RegExpException.ErrorType.InvalidRepeatCount,
                          firstPos,
                          this.pattern);
            }
        }