예제 #1
0
        /**
         * Parses a regular expression factor. This method handles the
         * Fact production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseFact(NFAState start)
        {
            NFAState placeholder = new NFAState();
            NFAState end;

            end = ParseAtom(placeholder);
            switch (PeekChar(0))
            {
            case '?':
            case '*':
            case '+':
            case '{':
                end = ParseAtomModifier(placeholder, end);
                break;
            }
            if (placeholder.incoming.Length > 0 && start.outgoing.Length > 0)
            {
                start.AddOut(new NFAEpsilonTransition(placeholder));
                return(end);
            }
            else
            {
                placeholder.MergeInto(start);
                return((end == placeholder) ? start : end);
            }
        }
예제 #2
0
        /**
         * Parses a regular expression. This method handles the Expr
         * production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseExpr(NFAState start)
        {
            NFAState end = new NFAState();
            NFAState subStart;
            NFAState subEnd;

            do
            {
                if (PeekChar(0) == '|')
                {
                    ReadChar('|');
                }
                subStart = new NFAState();
                subEnd   = ParseTerm(subStart);
                if (subStart.incoming.Length == 0)
                {
                    subStart.MergeInto(start);
                }
                else
                {
                    start.AddOut(new NFAEpsilonTransition(subStart));
                }
                if (subEnd.outgoing.Length == 0 ||
                    (!end.HasTransitions() && PeekChar(0) != '|'))
                {
                    subEnd.MergeInto(end);
                }
                else
                {
                    subEnd.AddOut(new NFAEpsilonTransition(end));
                }
            } while (PeekChar(0) == '|');
            return(end);
        }
예제 #3
0
        /**
         * Parses a regular expression atom modifier. This method handles
         * the AtomModifier production in the grammar (see regexp.grammar).
         *
         * @param start          the initial NFA state
         * @param end            the terminal NFA state
         *
         * @return the terminating NFA state
         *
         * @throws RegExpException if an error was encountered in the
         *             pattern string
         */
        private NFAState ParseAtomModifier(NFAState start, NFAState end)
        {
            int min      = 0;
            int max      = -1;
            int firstPos = pos;

            // Read min and max
            switch (ReadChar())
            {
            case '?':
                min = 0;
                max = 1;
                break;

            case '*':
                min = 0;
                max = -1;
                break;

            case '+':
                min = 1;
                max = -1;
                break;

            case '{':
                min = ReadNumber();
                max = min;
                if (PeekChar(0) == ',')
                {
                    ReadChar(',');
                    max = -1;
                    if (PeekChar(0) != '}')
                    {
                        max = ReadNumber();
                    }
                }
                ReadChar('}');
                if (max == 0 || (max > 0 && min > max))
                {
                    throw new RegExpException(
                              RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                              firstPos,
                              pattern);
                }
                break;

            default:
                throw new RegExpException(
                          RegExpException.ErrorType.UNEXPECTED_CHARACTER,
                          pos - 1,
                          pattern);
            }

            // Read possessive or reluctant modifiers
            if (PeekChar(0) == '?')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          pos,
                          pattern);
            }
            else if (PeekChar(0) == '+')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          pos,
                          pattern);
            }

            // Handle supported repeaters
            if (min == 0 && max == 1)
            {
                return(start.AddOut(new NFAEpsilonTransition(end)));
            }
            else if (min == 0 && max == -1)
            {
                if (end.outgoing.Length == 0)
                {
                    end.MergeInto(start);
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return(start);
            }
            else if (min == 1 && max == -1)
            {
                if (start.outgoing.Length == 1 &&
                    end.outgoing.Length == 0 &&
                    end.incoming.Length == 1 &&
                    start.outgoing[0] == end.incoming[0])
                {
                    end.AddOut(start.outgoing[0].Copy(end));
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return(end);
            }
            else
            {
                throw new RegExpException(
                          RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                          firstPos,
                          pattern);
            }
        }