Ejemplo n.º 1
0
        public static RegexNode operator %(RegexNode node1, RegexNode node2)
        {
            var left  = node1.Exp as CharSetExpression;
            var right = node2.Exp as CharSetExpression;

            Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse);

            var expression = new CharSetExpression();

            expression.IsReverse = false;
            foreach (var r in left.Ranges)
            {
                expression.Ranges.Add(r);
            }

            foreach (var r in right.Ranges)
            {
                if (!expression.AddRangeWithConflict(r.Begin, r.End))
                {
                    Debug.Assert(false, "Failed");
                }
            }

            return(new RegexNode(expression));
        }
Ejemplo n.º 2
0
        public static RegexNode GetCharSetExpression(char a, char b)
        {
            var expression = new CharSetExpression();

            expression.AddRangeWithConflict(a, b);
            return(new RegexNode(expression));
        }
Ejemplo n.º 3
0
        public static RegexNode operator %(RegexNode node1, RegexNode node2)
        {
            var left = node1.Exp as CharSetExpression;
            var right = node2.Exp as CharSetExpression;

            Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse);

            var expression = new CharSetExpression();
            expression.IsReverse = false;
            foreach (var r in left.Ranges)
            {
                expression.Ranges.Add(r);
            }

            foreach(var r in right.Ranges)
            {
                if(!expression.AddRangeWithConflict(r.Begin,r.End))
                {
                    Debug.Assert(false, "Failed");
                }
            }

            return new RegexNode(expression);
        }
Ejemplo n.º 4
0
        //be responsible for ^ $ . \b(etc.) [a-z] and any character
        public Expression ParseCharSet()
        {
            if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter)
            {
                return(null);
            }
            if (_sourceWindow.AdvanceIfMatches('^'))
            {
                return(new BeginExpression());
            }
            else if (_sourceWindow.AdvanceIfMatches('$'))
            {
                return(new EndExpression());
            }
            else if (_sourceWindow.AdvanceIfMatches('.'))
            {
                var ret = new CharSetExpression();
                ret.Add((char)1, char.MaxValue);
                return(ret);
            }
            else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
            {
                //\d etc.
                var exp = new CharSetExpression();
                var c2  = _sourceWindow.PeekChar();
                switch (c2)
                {
                case 'r':
                    exp.Add('\r');
                    break;

                case 'n':
                    exp.Add('\n');
                    break;

                case 't':
                    exp.Add('\t');
                    break;

                //需要转义的字符在这里
                case '\\':
                case '/':
                case '(':
                case ')':
                case '+':
                case '*':
                case '?':
                case '|':
                case '{':
                case '}':
                case '[':
                case ']':
                case '<':
                case '>':
                case '^':
                case '$':
                case '!':
                case '=':
                case '.':
                    exp.Add(c2);
                    break;

                case 'S':
                    exp.IsReverse = true;
                    goto case 's';

                case 's':
                    //spaces
                    exp.Add(' ');
                    exp.Add('\r');
                    exp.Add('\n');
                    exp.Add('\t');
                    break;

                case 'D':
                    exp.IsReverse = true;
                    goto case 'd';

                case 'd':
                    exp.Add('0', '9');
                    break;

                case 'L':
                    exp.IsReverse = true;
                    goto case 'l';

                case 'l':
                    exp.Add('_');
                    exp.Add('A', 'Z');
                    exp.Add('a', 'z');
                    break;

                case 'W':
                    exp.IsReverse = true;
                    goto case 'w';

                case 'w':
                    exp.Add('0', '9');
                    exp.Add('_');
                    exp.Add('A', 'Z');
                    exp.Add('a', 'z');
                    break;

                default:
                    throw new ArgumentException("Error character after \\");
                }
                _sourceWindow.AdvanceChar();
                return(exp);
            }
            //stuff like [a-z]
            else if (_sourceWindow.AdvanceIfMatches('['))
            {
                var exp = new CharSetExpression();
                exp.IsReverse = _sourceWindow.AdvanceIfMatches('^');
                bool midState = false;
                char lhs = default(char), rhs = default(char);
                while (true)
                {
                    if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
                    {
                        var  c   = _sourceWindow.PeekChar();
                        char tmp = default(char);
                        switch (c)
                        {
                        case 'r':
                            tmp = '\r';
                            break;

                        case 'n':
                            tmp = '\n';
                            break;

                        case 't':
                            tmp = '\t';
                            break;

                        //需要转义的字符在这里
                        case '\\':
                        case '/':
                        case '(':
                        case ')':
                        case '+':
                        case '*':
                        case '?':
                        case '|':
                        case '{':
                        case '}':
                        case '[':
                        case ']':
                        case '<':
                        case '>':
                        case '^':
                        case '$':
                        case '!':
                        case '=':
                        case '.':
                            tmp = c;
                            break;

                        default:
                            throw new ArgumentException("Error syntax in []");
                        }
                        _sourceWindow.AdvanceChar();
                        if (midState)
                        {
                            rhs = c;
                        }
                        else
                        {
                            lhs = c;
                        }
                        midState = !midState;
                    }
                    else if (_sourceWindow.AdvanceIfMatches("-]"))
                    {
                        throw new ArgumentException("-] occurred.");
                    }
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                            {
                                rhs = c2;
                            }
                            else
                            {
                                lhs = c2;
                            }
                            midState = !midState;
                        }
                        else
                        {
                            throw new ArgumentException("Error in []");
                        }
                    }
                    if (_sourceWindow.AdvanceIfMatches(']'))
                    {
                        if (midState)
                        {
                            rhs = lhs;
                        }
                        if (!exp.AddRangeWithConflict(lhs, rhs))
                        {
                            throw new ArgumentException();
                        }
                        break;
                    }
                    else if (_sourceWindow.AdvanceIfMatches('-'))
                    {
                        if (!midState)
                        {
                            throw new ArgumentException("Invalid - in []");
                        }
                    }
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                            {
                                rhs = lhs;
                            }
                            if (exp.AddRangeWithConflict(lhs, rhs))
                            {
                                midState = false;
                            }
                            else
                            {
                                throw new ArgumentException();
                            }
                        }
                    }
                }
                return(exp);
            }
            else
            {
                char c = default(char);
                if (_sourceWindow.AdvanceIfOneOf("()+*?{}|", out c))
                {
                    _sourceWindow.AdvanceChar(-1);
                    return(null);
                }
                else
                {
                    //the character itself
                    var exp = new CharSetExpression();
                    exp.IsReverse = false;
                    exp.Add(_sourceWindow.NextChar());
                    return(exp);
                }
            }
        }
Ejemplo n.º 5
0
        //be responsible for ^ $ . \b(etc.) [a-z] and any character
        public Expression ParseCharSet()
        {
            if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter)
                return null;
            if (_sourceWindow.AdvanceIfMatches('^'))
                return new BeginExpression();
            else if (_sourceWindow.AdvanceIfMatches('$'))
                return new EndExpression();
            else if (_sourceWindow.AdvanceIfMatches('.'))
            {
                var ret = new CharSetExpression();
                ret.Add((char)1,char.MaxValue);
                return ret;
            }
            else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
            {
                //\d etc.
                var exp = new CharSetExpression();
                var c2 = _sourceWindow.PeekChar();
                switch (c2)
                {
                    case 'r':
                        exp.Add('\r');
                        break;
                    case 'n':
                        exp.Add('\n');
                        break;
                    case 't':
                        exp.Add('\t');
                        break;
                    //需要转义的字符在这里
                    case '\\':
                    case '/':
                    case '(':
                    case ')':
                    case '+':
                    case '*':
                    case '?':
                    case '|':
                    case '{':
                    case '}':
                    case '[':
                    case ']':
                    case '<':
                    case '>':
                    case '^':
                    case '$':
                    case '!':
                    case '=':
                    case '.':
                        exp.Add(c2);
                        break;
                    case 'S':
                        exp.IsReverse = true;
                        goto case 's';
                    case 's':
                        //spaces
                        exp.Add(' ');
                        exp.Add('\r');
                        exp.Add('\n');
                        exp.Add('\t');
                        break;
                    case 'D':
                        exp.IsReverse = true;
                        goto case 'd';
                    case 'd':
                        exp.Add('0', '9');
                        break;
                    case 'L':
                        exp.IsReverse = true;
                        goto case 'l';
                    case 'l':
                        exp.Add('_');
                        exp.Add('A', 'Z');
                        exp.Add('a', 'z');
                        break;
                    case 'W':
                        exp.IsReverse = true;
                        goto case 'w';
                    case 'w':
                        exp.Add('0', '9');
                        exp.Add('_');
                        exp.Add('A', 'Z');
                        exp.Add('a', 'z');
                        break;
                    default:
                        throw new ArgumentException("Error character after \\");
                }
                _sourceWindow.AdvanceChar();
                return exp;
            }
            //stuff like [a-z]
            else if (_sourceWindow.AdvanceIfMatches('['))
            {
                var exp = new CharSetExpression();
                exp.IsReverse = _sourceWindow.AdvanceIfMatches('^');
                bool midState = false;
                char lhs = default(char), rhs = default(char);
                while (true)
                {
                    if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
                    {
                        var c = _sourceWindow.PeekChar();
                        char tmp = default(char);
                        switch (c)
                        {
                            case 'r':
                                tmp = '\r';
                                break;
                            case 'n':
                                tmp = '\n';
                                break;
                            case 't':
                                tmp = '\t';
                                break;
                            //需要转义的字符在这里
                            case '\\':
                            case '/':
                            case '(':
                            case ')':
                            case '+':
                            case '*':
                            case '?':
                            case '|':
                            case '{':
                            case '}':
                            case '[':
                            case ']':
                            case '<':
                            case '>':
                            case '^':
                            case '$':
                            case '!':
                            case '=':
                            case '.':
                                tmp = c;
                                break;
                            default:
                                throw new ArgumentException("Error syntax in []");
                        }
                        _sourceWindow.AdvanceChar();
                        if (midState)
                            rhs = c;
                        else
                            lhs = c;
                        midState = !midState;
                    }
                    else if (_sourceWindow.AdvanceIfMatches("-]"))
                        throw new ArgumentException("-] occurred.");
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                                rhs = c2;
                            else
                                lhs = c2;
                            midState = !midState;
                        }
                        else throw new ArgumentException("Error in []");
                    }
                    if (_sourceWindow.AdvanceIfMatches(']'))
                    {
                        if (midState)
                            rhs = lhs;
                        if (!exp.AddRangeWithConflict(lhs, rhs))
                            throw new ArgumentException();
                        break;
                    }
                    else if (_sourceWindow.AdvanceIfMatches('-'))
                    {
                        if (!midState)
                            throw new ArgumentException("Invalid - in []");
                    }
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                            {
                                rhs = lhs;
                            }
                            if (exp.AddRangeWithConflict(lhs, rhs))
                                midState = false;
                            else throw new ArgumentException();

                        }
                    }
                }
                return exp;
            }
            else
            {
                char c = default(char);
                if (_sourceWindow.AdvanceIfOneOf("()+*?{}|",out c))
                {
                    _sourceWindow.AdvanceChar(-1);
                    return null;
                }
                else
                {
                    //the character itself
                    var exp = new CharSetExpression();
                    exp.IsReverse = false;
                    exp.Add(_sourceWindow.NextChar());
                    return exp;
                }
            }
        }
Ejemplo n.º 6
0
 public static RegexNode GetCharSetExpression(char a,char b)
 {
     var expression = new CharSetExpression();
     expression.AddRangeWithConflict(a, b);
     return new RegexNode(expression);
 }