Exemple #1
0
        public static RegexNode operator %(RegexNode node1, RegexNode node2)
        {
            var left  = node1.Exp as CharSetExpression;
            var right = node2.Exp as CharSetExpression;

            Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse);

            var expression = new CharSetExpression();

            expression.IsReverse = false;
            foreach (var r in left.Ranges)
            {
                expression.Ranges.Add(r);
            }

            foreach (var r in right.Ranges)
            {
                if (!expression.AddRangeWithConflict(r.Begin, r.End))
                {
                    Debug.Assert(false, "Failed");
                }
            }

            return(new RegexNode(expression));
        }
Exemple #2
0
        public static RegexNode GetCharSetExpression(char a, char b)
        {
            var expression = new CharSetExpression();

            expression.AddRangeWithConflict(a, b);
            return(new RegexNode(expression));
        }
Exemple #3
0
        public override EpsilonNfa Apply(CharSetExpression expression, Automaton param)
        {
            //[a-z A-Z] 是或,两个 State 之间有许多 range 边
            var nfa = new EpsilonNfa(param);

            foreach (var r in expression.Ranges)
            {
                param.AddCharRange(nfa.Start, nfa.End, r);
            }
            return(nfa);
        }
Exemple #4
0
 public static RegexNode operator !(RegexNode node)
 {
     var exp = node.Exp as CharSetExpression;
     if(exp != null)
     {
         var expression = new CharSetExpression();
         foreach (var r in exp.Ranges)
         {
             expression.Ranges.Add(r);
         }
         expression.IsReverse = !exp.IsReverse;
         return new RegexNode(expression);
     }
     return null;
 }
Exemple #5
0
        public static RegexNode operator !(RegexNode node)
        {
            var exp = node.Exp as CharSetExpression;

            if (exp != null)
            {
                var expression = new CharSetExpression();
                foreach (var r in exp.Ranges)
                {
                    expression.Ranges.Add(r);
                }
                expression.IsReverse = !exp.IsReverse;
                return(new RegexNode(expression));
            }
            return(null);
        }
Exemple #6
0
 public bool Equals(CharSetExpression obj)
 {
     if (IsReverse != obj.IsReverse)
     {
         return(false);
     }
     if (Ranges.Count != obj.Ranges.Count)
     {
         return(false);
     }
     for (int i = 0; i < Ranges.Count; ++i)
     {
         if (Ranges[i] != obj.Ranges[i])
         {
             return(false);
         }
     }
     return(true);
 }
Exemple #7
0
        public static RegexNode operator %(RegexNode node1, RegexNode node2)
        {
            var left = node1.Exp as CharSetExpression;
            var right = node2.Exp as CharSetExpression;

            Debug.Assert(left != null && right != null && !left.IsReverse && !right.IsReverse);

            var expression = new CharSetExpression();
            expression.IsReverse = false;
            foreach (var r in left.Ranges)
            {
                expression.Ranges.Add(r);
            }

            foreach(var r in right.Ranges)
            {
                if(!expression.AddRangeWithConflict(r.Begin,r.End))
                {
                    Debug.Assert(false, "Failed");
                }
            }

            return new RegexNode(expression);
        }
Exemple #8
0
 public override Expression Apply(CharSetExpression expression, MergeParameter param)
 {
     return(new CharSetExpression(expression));
 }
Exemple #9
0
        //be responsible for ^ $ . \b(etc.) [a-z] and any character
        public Expression ParseCharSet()
        {
            if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter)
            {
                return(null);
            }
            if (_sourceWindow.AdvanceIfMatches('^'))
            {
                return(new BeginExpression());
            }
            else if (_sourceWindow.AdvanceIfMatches('$'))
            {
                return(new EndExpression());
            }
            else if (_sourceWindow.AdvanceIfMatches('.'))
            {
                var ret = new CharSetExpression();
                ret.Add((char)1, char.MaxValue);
                return(ret);
            }
            else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
            {
                //\d etc.
                var exp = new CharSetExpression();
                var c2  = _sourceWindow.PeekChar();
                switch (c2)
                {
                case 'r':
                    exp.Add('\r');
                    break;

                case 'n':
                    exp.Add('\n');
                    break;

                case 't':
                    exp.Add('\t');
                    break;

                //需要转义的字符在这里
                case '\\':
                case '/':
                case '(':
                case ')':
                case '+':
                case '*':
                case '?':
                case '|':
                case '{':
                case '}':
                case '[':
                case ']':
                case '<':
                case '>':
                case '^':
                case '$':
                case '!':
                case '=':
                case '.':
                    exp.Add(c2);
                    break;

                case 'S':
                    exp.IsReverse = true;
                    goto case 's';

                case 's':
                    //spaces
                    exp.Add(' ');
                    exp.Add('\r');
                    exp.Add('\n');
                    exp.Add('\t');
                    break;

                case 'D':
                    exp.IsReverse = true;
                    goto case 'd';

                case 'd':
                    exp.Add('0', '9');
                    break;

                case 'L':
                    exp.IsReverse = true;
                    goto case 'l';

                case 'l':
                    exp.Add('_');
                    exp.Add('A', 'Z');
                    exp.Add('a', 'z');
                    break;

                case 'W':
                    exp.IsReverse = true;
                    goto case 'w';

                case 'w':
                    exp.Add('0', '9');
                    exp.Add('_');
                    exp.Add('A', 'Z');
                    exp.Add('a', 'z');
                    break;

                default:
                    throw new ArgumentException("Error character after \\");
                }
                _sourceWindow.AdvanceChar();
                return(exp);
            }
            //stuff like [a-z]
            else if (_sourceWindow.AdvanceIfMatches('['))
            {
                var exp = new CharSetExpression();
                exp.IsReverse = _sourceWindow.AdvanceIfMatches('^');
                bool midState = false;
                char lhs = default(char), rhs = default(char);
                while (true)
                {
                    if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
                    {
                        var  c   = _sourceWindow.PeekChar();
                        char tmp = default(char);
                        switch (c)
                        {
                        case 'r':
                            tmp = '\r';
                            break;

                        case 'n':
                            tmp = '\n';
                            break;

                        case 't':
                            tmp = '\t';
                            break;

                        //需要转义的字符在这里
                        case '\\':
                        case '/':
                        case '(':
                        case ')':
                        case '+':
                        case '*':
                        case '?':
                        case '|':
                        case '{':
                        case '}':
                        case '[':
                        case ']':
                        case '<':
                        case '>':
                        case '^':
                        case '$':
                        case '!':
                        case '=':
                        case '.':
                            tmp = c;
                            break;

                        default:
                            throw new ArgumentException("Error syntax in []");
                        }
                        _sourceWindow.AdvanceChar();
                        if (midState)
                        {
                            rhs = c;
                        }
                        else
                        {
                            lhs = c;
                        }
                        midState = !midState;
                    }
                    else if (_sourceWindow.AdvanceIfMatches("-]"))
                    {
                        throw new ArgumentException("-] occurred.");
                    }
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                            {
                                rhs = c2;
                            }
                            else
                            {
                                lhs = c2;
                            }
                            midState = !midState;
                        }
                        else
                        {
                            throw new ArgumentException("Error in []");
                        }
                    }
                    if (_sourceWindow.AdvanceIfMatches(']'))
                    {
                        if (midState)
                        {
                            rhs = lhs;
                        }
                        if (!exp.AddRangeWithConflict(lhs, rhs))
                        {
                            throw new ArgumentException();
                        }
                        break;
                    }
                    else if (_sourceWindow.AdvanceIfMatches('-'))
                    {
                        if (!midState)
                        {
                            throw new ArgumentException("Invalid - in []");
                        }
                    }
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                            {
                                rhs = lhs;
                            }
                            if (exp.AddRangeWithConflict(lhs, rhs))
                            {
                                midState = false;
                            }
                            else
                            {
                                throw new ArgumentException();
                            }
                        }
                    }
                }
                return(exp);
            }
            else
            {
                char c = default(char);
                if (_sourceWindow.AdvanceIfOneOf("()+*?{}|", out c))
                {
                    _sourceWindow.AdvanceChar(-1);
                    return(null);
                }
                else
                {
                    //the character itself
                    var exp = new CharSetExpression();
                    exp.IsReverse = false;
                    exp.Add(_sourceWindow.NextChar());
                    return(exp);
                }
            }
        }
Exemple #10
0
 public CharSetExpression(CharSetExpression expression)
 {
     Ranges    = new FlatSet <CharRange>(expression.Ranges);
     IsReverse = expression.IsReverse;
 }
 public void Visit(CharSetExpression expression)
 {
     _returnValue = this.Apply(expression, _paramValue);
 }
 public abstract ReturnT Apply(CharSetExpression expression, ParamT param);
Exemple #13
0
        //be responsible for ^ $ . \b(etc.) [a-z] and any character
        public Expression ParseCharSet()
        {
            if (_sourceWindow.PeekChar() == SlidingTextWindow.InvalidCharacter)
                return null;
            if (_sourceWindow.AdvanceIfMatches('^'))
                return new BeginExpression();
            else if (_sourceWindow.AdvanceIfMatches('$'))
                return new EndExpression();
            else if (_sourceWindow.AdvanceIfMatches('.'))
            {
                var ret = new CharSetExpression();
                ret.Add((char)1,char.MaxValue);
                return ret;
            }
            else if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
            {
                //\d etc.
                var exp = new CharSetExpression();
                var c2 = _sourceWindow.PeekChar();
                switch (c2)
                {
                    case 'r':
                        exp.Add('\r');
                        break;
                    case 'n':
                        exp.Add('\n');
                        break;
                    case 't':
                        exp.Add('\t');
                        break;
                    //需要转义的字符在这里
                    case '\\':
                    case '/':
                    case '(':
                    case ')':
                    case '+':
                    case '*':
                    case '?':
                    case '|':
                    case '{':
                    case '}':
                    case '[':
                    case ']':
                    case '<':
                    case '>':
                    case '^':
                    case '$':
                    case '!':
                    case '=':
                    case '.':
                        exp.Add(c2);
                        break;
                    case 'S':
                        exp.IsReverse = true;
                        goto case 's';
                    case 's':
                        //spaces
                        exp.Add(' ');
                        exp.Add('\r');
                        exp.Add('\n');
                        exp.Add('\t');
                        break;
                    case 'D':
                        exp.IsReverse = true;
                        goto case 'd';
                    case 'd':
                        exp.Add('0', '9');
                        break;
                    case 'L':
                        exp.IsReverse = true;
                        goto case 'l';
                    case 'l':
                        exp.Add('_');
                        exp.Add('A', 'Z');
                        exp.Add('a', 'z');
                        break;
                    case 'W':
                        exp.IsReverse = true;
                        goto case 'w';
                    case 'w':
                        exp.Add('0', '9');
                        exp.Add('_');
                        exp.Add('A', 'Z');
                        exp.Add('a', 'z');
                        break;
                    default:
                        throw new ArgumentException("Error character after \\");
                }
                _sourceWindow.AdvanceChar();
                return exp;
            }
            //stuff like [a-z]
            else if (_sourceWindow.AdvanceIfMatches('['))
            {
                var exp = new CharSetExpression();
                exp.IsReverse = _sourceWindow.AdvanceIfMatches('^');
                bool midState = false;
                char lhs = default(char), rhs = default(char);
                while (true)
                {
                    if (_sourceWindow.AdvanceIfMatches('\\') || _sourceWindow.AdvanceIfMatches('/'))
                    {
                        var c = _sourceWindow.PeekChar();
                        char tmp = default(char);
                        switch (c)
                        {
                            case 'r':
                                tmp = '\r';
                                break;
                            case 'n':
                                tmp = '\n';
                                break;
                            case 't':
                                tmp = '\t';
                                break;
                            //需要转义的字符在这里
                            case '\\':
                            case '/':
                            case '(':
                            case ')':
                            case '+':
                            case '*':
                            case '?':
                            case '|':
                            case '{':
                            case '}':
                            case '[':
                            case ']':
                            case '<':
                            case '>':
                            case '^':
                            case '$':
                            case '!':
                            case '=':
                            case '.':
                                tmp = c;
                                break;
                            default:
                                throw new ArgumentException("Error syntax in []");
                        }
                        _sourceWindow.AdvanceChar();
                        if (midState)
                            rhs = c;
                        else
                            lhs = c;
                        midState = !midState;
                    }
                    else if (_sourceWindow.AdvanceIfMatches("-]"))
                        throw new ArgumentException("-] occurred.");
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                                rhs = c2;
                            else
                                lhs = c2;
                            midState = !midState;
                        }
                        else throw new ArgumentException("Error in []");
                    }
                    if (_sourceWindow.AdvanceIfMatches(']'))
                    {
                        if (midState)
                            rhs = lhs;
                        if (!exp.AddRangeWithConflict(lhs, rhs))
                            throw new ArgumentException();
                        break;
                    }
                    else if (_sourceWindow.AdvanceIfMatches('-'))
                    {
                        if (!midState)
                            throw new ArgumentException("Invalid - in []");
                    }
                    else
                    {
                        var c2 = _sourceWindow.NextChar();
                        if (_sourceWindow.IsValid(c2))
                        {
                            if (midState)
                            {
                                rhs = lhs;
                            }
                            if (exp.AddRangeWithConflict(lhs, rhs))
                                midState = false;
                            else throw new ArgumentException();

                        }
                    }
                }
                return exp;
            }
            else
            {
                char c = default(char);
                if (_sourceWindow.AdvanceIfOneOf("()+*?{}|",out c))
                {
                    _sourceWindow.AdvanceChar(-1);
                    return null;
                }
                else
                {
                    //the character itself
                    var exp = new CharSetExpression();
                    exp.IsReverse = false;
                    exp.Add(_sourceWindow.NextChar());
                    return exp;
                }
            }
        }
Exemple #14
0
 public static RegexNode GetCharSetExpression(char a,char b)
 {
     var expression = new CharSetExpression();
     expression.AddRangeWithConflict(a, b);
     return new RegexNode(expression);
 }
Exemple #15
0
 public bool Equals(CharSetExpression obj)
 {
     if (IsReverse != obj.IsReverse) return false;
     if (Ranges.Count != obj.Ranges.Count) return false;
     for(int i = 0;i < Ranges.Count;++i)
     {
         if (Ranges[i] != obj.Ranges[i])
             return false;
     }
     return true;
 }
Exemple #16
0
 public CharSetExpression(CharSetExpression expression)
 {
     Ranges = new FlatSet<CharRange>(expression.Ranges);
     IsReverse = expression.IsReverse;
 }