Example #1
0
        private NFAState ParseFact(NFAState start)
        {
            NFAState placeholder = new NFAState();

            var end = ParseAtom(placeholder);

            switch (PeekChar(0))
            {
            case '?':
            case '*':
            case '+':
            case '{':
                end = ParseAtomModifier(placeholder, end);
                break;
            }
            if (placeholder.Incoming.Length > 0 && start.Outgoing.Length > 0)
            {
                start.AddOut(new NFAEpsilonTransition(placeholder));
                return(end);
            }
            else
            {
                placeholder.MergeInto(start);
                return((end == placeholder) ? start : end);
            }
        }
Example #2
0
        private NFAState ParseExpr(NFAState start)
        {
            NFAState end = new NFAState();

            do
            {
                if (PeekChar(0) == '|')
                {
                    ReadChar('|');
                }
                var subStart = new NFAState();
                var subEnd   = ParseTerm(subStart);
                if (subStart.Incoming.Length == 0)
                {
                    subStart.MergeInto(start);
                }
                else
                {
                    start.AddOut(new NFAEpsilonTransition(subStart));
                }
                if (subEnd.Outgoing.Length == 0 ||
                    (!end.HasTransitions() && PeekChar(0) != '|'))
                {
                    subEnd.MergeInto(end);
                }
                else
                {
                    subEnd.AddOut(new NFAEpsilonTransition(end));
                }
            } while (PeekChar(0) == '|');
            return(end);
        }
Example #3
0
        public void AddTextMatch(string str, bool ignoreCase, TokenPattern value)
        {
            NFAState state;
            char     ch = str[0];

            if (ch < 128 && !ignoreCase)
            {
                state = _initialChar[ch];
                if (state == null)
                {
                    state = _initialChar[ch] = new NFAState();
                }
            }
            else
            {
                state = _initial.AddOut(ch, ignoreCase, null);
            }
            for (int i = 1; i < str.Length; i++)
            {
                state = state.AddOut(str[i], ignoreCase, null);
            }
            state.Value = value;
        }
Example #4
0
 public void MergeInto(NFAState state)
 {
     for (int i = 0; i < Incoming.Length; i++)
     {
         state.AddIn(Incoming[i]);
         Incoming[i].State = state;
     }
     Incoming = null;
     for (int i = 0; i < Outgoing.Length; i++)
     {
         state.AddOut(Outgoing[i]);
     }
     Outgoing = null;
 }
Example #5
0
        private NFAState ParseCharSet(NFAState start)
        {
            NFAState end = new NFAState();
            NFACharRangeTransition range;

            if (PeekChar(0) == '^')
            {
                ReadChar('^');
                range = new NFACharRangeTransition(true, _ignoreCase, end);
            }
            else
            {
                range = new NFACharRangeTransition(false, _ignoreCase, end);
            }
            start.AddOut(range);
            while (PeekChar(0) > 0)
            {
                var min = (char)PeekChar(0);
                switch (min)
                {
                case ']':
                    return(end);

                case '\\':
                    range.AddCharacter(ReadEscapeChar());
                    break;

                default:
                    ReadChar(min);
                    if (PeekChar(0) == '-' &&
                        PeekChar(1) > 0 &&
                        PeekChar(1) != ']')
                    {
                        ReadChar('-');
                        var max = ReadChar();
                        range.AddRange(min, max);
                    }
                    else
                    {
                        range.AddCharacter(min);
                    }
                    break;
                }
            }
            return(end);
        }
Example #6
0
        private NFAState ParseChar(NFAState start)
        {
            switch (PeekChar(0))
            {
            case '\\':
                return(ParseEscapeChar(start));

            case '^':
            case '$':
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          _pos,
                          _pattern);

            default:
                return(start.AddOut(ReadChar(), _ignoreCase, new NFAState()));
            }
        }
Example #7
0
        private NFAState ParseAtom(NFAState start)
        {
            NFAState end;

            switch (PeekChar(0))
            {
            case '.':
                ReadChar('.');
                return(start.AddOut(new NFADotTransition(new NFAState())));

            case '(':
                ReadChar('(');
                end = ParseExpr(start);
                ReadChar(')');
                return(end);

            case '[':
                ReadChar('[');
                end = ParseCharSet(start);
                ReadChar(']');
                return(end);

            case -1:
            case ')':
            case ']':
            case '{':
            case '}':
            case '?':
            case '*':
            case '+':
            case '|':
                throw new RegExpException(
                          RegExpException.ErrorType.UNEXPECTED_CHARACTER,
                          _pos,
                          _pattern);

            default:
                return(ParseChar(start));
            }
        }
Example #8
0
        private NFAState ParseEscapeChar(NFAState start)
        {
            NFAState end = new NFAState();

            if (PeekChar(0) == '\\' && PeekChar(1) > 0)
            {
                switch ((char)PeekChar(1))
                {
                case 'd':
                    ReadChar();
                    ReadChar();
                    return(start.AddOut(new NFADigitTransition(end)));

                case 'D':
                    ReadChar();
                    ReadChar();
                    return(start.AddOut(new NFANonDigitTransition(end)));

                case 's':
                    ReadChar();
                    ReadChar();
                    return(start.AddOut(new NFAWhitespaceTransition(end)));

                case 'S':
                    ReadChar();
                    ReadChar();
                    return(start.AddOut(new NFANonWhitespaceTransition(end)));

                case 'w':
                    ReadChar();
                    ReadChar();
                    return(start.AddOut(new NFAWordTransition(end)));

                case 'W':
                    ReadChar();
                    ReadChar();
                    return(start.AddOut(new NFANonWordTransition(end)));
                }
            }
            return(start.AddOut(ReadEscapeChar(), _ignoreCase, end));
        }
Example #9
0
        private NFAState ParseAtomModifier(NFAState start, NFAState end)
        {
            int min      = 0;
            int max      = -1;
            int firstPos = _pos;

            // Read min and max
            switch (ReadChar())
            {
            case '?':
                min = 0;
                max = 1;
                break;

            case '*':
                min = 0;
                max = -1;
                break;

            case '+':
                min = 1;
                max = -1;
                break;

            case '{':
                min = ReadNumber();
                max = min;
                if (PeekChar(0) == ',')
                {
                    ReadChar(',');
                    max = -1;
                    if (PeekChar(0) != '}')
                    {
                        max = ReadNumber();
                    }
                }
                ReadChar('}');
                if (max == 0 || (max > 0 && min > max))
                {
                    throw new RegExpException(
                              RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                              firstPos,
                              _pattern);
                }
                break;

            default:
                throw new RegExpException(
                          RegExpException.ErrorType.UNEXPECTED_CHARACTER,
                          _pos - 1,
                          _pattern);
            }

            // Read possessive or reluctant modifiers
            if (PeekChar(0) == '?')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          _pos,
                          _pattern);
            }
            else if (PeekChar(0) == '+')
            {
                throw new RegExpException(
                          RegExpException.ErrorType.UNSUPPORTED_SPECIAL_CHARACTER,
                          _pos,
                          _pattern);
            }

            // Handle supported repeaters
            if (min == 0 && max == 1)
            {
                return(start.AddOut(new NFAEpsilonTransition(end)));
            }
            else if (min == 0 && max == -1)
            {
                if (end.Outgoing.Length == 0)
                {
                    end.MergeInto(start);
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return(start);
            }
            else if (min == 1 && max == -1)
            {
                if (start.Outgoing.Length == 1 &&
                    end.Outgoing.Length == 0 &&
                    end.Incoming.Length == 1 &&
                    start.Outgoing[0] == end.Incoming[0])
                {
                    end.AddOut(start.Outgoing[0].Copy(end));
                }
                else
                {
                    end.AddOut(new NFAEpsilonTransition(start));
                }
                return(end);
            }
            else
            {
                throw new RegExpException(
                          RegExpException.ErrorType.INVALID_REPEAT_COUNT,
                          firstPos,
                          _pattern);
            }
        }