Exemple #1
0
 public RE_Pattern(object pattern, int flags)
 {
     pre = PreParseRegex(ValidatePattern(pattern));
     try {
         RegexOptions opts = FlagsToOption(flags);
         this.re = new Regex(pre.Pattern, opts);
     } catch (ArgumentException e) {
         throw ExceptionConverter.CreateThrowable(error, e.Message);
     }
     this.compileFlags = flags;
 }
Exemple #2
0
        private ParsedRegex ParseSpecial(char [] glob, int index)
        {
            foreach (var parser in Parsers)
            {
                ParsedRegex parsedRegex = parser(glob, index);
                if (parsedRegex != null)
                {
                    return(parsedRegex);
                }
            }

            return(null);
        }
Exemple #3
0
        private string GetRegexStringFromGlob(string glob)
        {
            ParsedRegex parsedRegex = ParseGlob(glob.ToCharArray(), 0);

            if (parsedRegex != null && parsedRegex.Index == glob.Length)
            {
                return(parsedRegex.Regex);
            }
            else
            {
                throw new ConfigurationException(string.Format("syntax error in glob `{0}'", glob));
            }
        }
Exemple #4
0
        /// <summary>
        /// Preparses a regular expression text returning a ParsedRegex class
        /// that can be used for further regular expressions.
        /// </summary>
        private static ParsedRegex PreParseRegex(string pattern)
        {
            ParsedRegex res = new ParsedRegex(pattern);

            //string newPattern;
            int  cur = 0, nameIndex;
            int  curGroup           = 0;
            bool containsNamedGroup = false;

            for (; ;)
            {
                nameIndex = pattern.IndexOf("(", cur);

                if (nameIndex == -1)
                {
                    break;
                }
                if (nameIndex == pattern.Length - 1)
                {
                    break;
                }

                switch (pattern[++nameIndex])
                {
                case '?':
                    // extension syntax
                    if (nameIndex == pattern.Length - 1)
                    {
                        throw ExceptionConverter.CreateThrowable(error, "unexpected end of regex");
                    }
                    switch (pattern[++nameIndex])
                    {
                    case 'P':
                        //  named regex, .NET doesn't expect the P so we'll remove it;
                        //  also, once we see a named group i.e. ?P then we need to start artificially
                        //  naming all unnamed groups from then on---this is to get around the fact that
                        //  the CLR RegEx support orders all the unnamed groups before all the named
                        //  groups, even if the named groups are before the unnamed ones in the pattern;
                        //  the artificial naming preserves the order of the groups and thus the order of
                        //  the matches
                        containsNamedGroup = true;
                        pattern            = pattern.Remove(nameIndex, 1);
                        break;

                    case 'i': res.Options |= RegexOptions.IgnoreCase; break;

                    case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break;

                    case 'm': res.Options |= RegexOptions.Multiline; break;

                    case 's': res.Options |= RegexOptions.Singleline; break;

                    case 'u': break;

                    case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;

                    case ':': break;   // non-capturing

                    case '=': break;   // look ahead assertion

                    case '<': break;   // positive look behind assertion

                    case '!': break;   // negative look ahead assertion

                    case '(':          // yes/no if group exists, we don't support this
                    default: throw ExceptionConverter.CreateThrowable(error, "Unrecognized extension " + pattern[nameIndex]);
                    }
                    break;

                default:
                    // just another group
                    curGroup++;
                    if (containsNamedGroup)
                    {
                        // need to name this unnamed group
                        pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">");
                    }
                    break;
                }

                cur = nameIndex;
            }

            cur = 0;
            for (; ;)
            {
                nameIndex = pattern.IndexOf('\\', cur);

                if (nameIndex == -1 || nameIndex == pattern.Length - 1)
                {
                    break;
                }
                char curChar = pattern[++nameIndex];
                switch (curChar)
                {
                case 'x':
                case 'u':
                case 'a':
                case 'b':
                case 'e':
                case 'f':
                case 'n':
                case 'r':
                case 't':
                case 'v':
                case 'c':
                case 's':
                case 'W':
                case 'w':
                case 'p':
                case 'P':
                case 'S':
                case 'd':
                case 'D':
                    // known escape sequences, leave escaped.
                    break;

                default:
                    System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
                    switch (charClass)
                    {
                    // recognized word characters, always unescape.
                    case System.Globalization.UnicodeCategory.ModifierLetter:
                    case System.Globalization.UnicodeCategory.LowercaseLetter:
                    case System.Globalization.UnicodeCategory.UppercaseLetter:
                    case System.Globalization.UnicodeCategory.TitlecaseLetter:
                    case System.Globalization.UnicodeCategory.OtherLetter:
                    case System.Globalization.UnicodeCategory.DecimalDigitNumber:
                    case System.Globalization.UnicodeCategory.LetterNumber:
                    case System.Globalization.UnicodeCategory.OtherNumber:
                    case System.Globalization.UnicodeCategory.ConnectorPunctuation:
                        pattern = pattern.Remove(nameIndex - 1, 1);
                        break;
                    }
                    break;
                }
                cur++;
            }

            res.Pattern = pattern;
            return(res);
        }
Exemple #5
0
        /// <summary>
        /// Preparses a regular expression text returning a ParsedRegex class
        /// that can be used for further regular expressions.
        /// </summary>
        private static ParsedRegex PreParseRegex(CodeContext/*!*/ context, string pattern) {
            ParsedRegex res = new ParsedRegex(pattern);

            //string newPattern;
            int cur = 0, nameIndex;
            int curGroup = 0;
            bool isCharList = false;
            bool containsNamedGroup = false;

            for (; ; ) {
                nameIndex = pattern.IndexOfAny(_preParsedChars, cur);
                if (nameIndex > 0 && pattern[nameIndex - 1] == '\\') {
                    int curIndex = nameIndex - 2;
                    int backslashCount = 1;
                    while (curIndex >= 0 && pattern[curIndex] == '\\') {
                        backslashCount++;
                        curIndex--;
                    }
                    // odd number of back slashes, this is an optional
                    // paren that we should ignore.
                    if ((backslashCount & 0x01) != 0) {
                        cur++;
                        continue;
                    }
                }

                if (nameIndex == -1) break;
                if (nameIndex == pattern.Length - 1) break;

                switch (pattern[nameIndex]) {
                    case '{':
                        if (pattern[++nameIndex] == ',') {
                            // no beginning specified for the n-m quntifier, add the
                            // default 0 value.
                            pattern = pattern.Insert(nameIndex, "0");
                        }
                        break;
                    case '[':
                        nameIndex++;
                        isCharList = true;
                        break;
                    case ']':
                        nameIndex++;
                        isCharList = false;
                        break;
                    case '(':
                        // make sure we're not dealing with [(]
                        if (!isCharList) {
                            switch (pattern[++nameIndex]) {
                                case '?':
                                    // extension syntax
                                    if (nameIndex == pattern.Length - 1) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex");
                                    switch (pattern[++nameIndex]) {
                                        case 'P':
                                            //  named regex, .NET doesn't expect the P so we'll remove it;
                                            //  also, once we see a named group i.e. ?P then we need to start artificially 
                                            //  naming all unnamed groups from then on---this is to get around the fact that 
                                            //  the CLR RegEx support orders all the unnamed groups before all the named 
                                            //  groups, even if the named groups are before the unnamed ones in the pattern;
                                            //  the artificial naming preserves the order of the groups and thus the order of
                                            //  the matches
                                            if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=') {
                                                // match whatever was previously matched by the named group

                                                // remove the (?P=
                                                pattern = pattern.Remove(nameIndex - 2, 4);
                                                pattern = pattern.Insert(nameIndex - 2, "\\k<");
                                                int tmpIndex = nameIndex;
                                                while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
                                                    tmpIndex++;

                                                if (tmpIndex == pattern.Length) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex");

                                                pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1);
                                            } else {
                                                containsNamedGroup = true;
                                                pattern = pattern.Remove(nameIndex, 1);
                                            }
                                            break;
                                        case 'i': res.Options |= RegexOptions.IgnoreCase; break;
                                        case 'L':
                                            res.Options &= ~(RegexOptions.CultureInvariant);
                                            RemoveOption(ref pattern, ref nameIndex);
                                            break;
                                        case 'm': res.Options |= RegexOptions.Multiline; break;
                                        case 's': res.Options |= RegexOptions.Singleline; break;
                                        case 'u':
                                            // specify unicode; not relevant and not valid under .NET as we're always unicode
                                            // -- so the option needs to be removed
                                            RemoveOption(ref pattern, ref nameIndex);
                                            break;
                                        case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
                                        case ':': break; // non-capturing
                                        case '=': break; // look ahead assertion
                                        case '<': break; // positive look behind assertion
                                        case '!': break; // negative look ahead assertion
                                        case '#': break; // inline comment
                                        case '(':  // yes/no if group exists, we don't support this
                                        default: throw PythonExceptions.CreateThrowable(error(context), "Unrecognized extension " + pattern[nameIndex]);
                                    }
                                    break;
                                default:
                                    // just another group
                                    curGroup++;
                                    if (containsNamedGroup) {
                                        // need to name this unnamed group
                                        pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">");
                                    }
                                    break;
                            }
                        } else {
                            nameIndex++;
                        }
                        break;
                }

                cur = nameIndex;
            }

            cur = 0;
            for (; ; ) {
                nameIndex = pattern.IndexOf('\\', cur);

                if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
                cur = ++nameIndex;
                char curChar = pattern[cur];
                switch (curChar) {
                    case 'x':
                    case 'u':
                    case 'a':
                    case 'b':
                    case 'e':
                    case 'f':
                    case 'k':
                    case 'n':
                    case 'r':
                    case 't':
                    case 'v':
                    case 'c':
                    case 's':
                    case 'W':
                    case 'w':
                    case 'p':
                    case 'P':
                    case 'S':
                    case 'd':
                    case 'D':
                    case 'Z':
                    case '\\':
                        // known escape sequences, leave escaped.
                        break;
                    default:
                        System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
                        switch (charClass) {
                            // recognized word characters, always unescape.
                            case System.Globalization.UnicodeCategory.ModifierLetter:
                            case System.Globalization.UnicodeCategory.LowercaseLetter:
                            case System.Globalization.UnicodeCategory.UppercaseLetter:
                            case System.Globalization.UnicodeCategory.TitlecaseLetter:
                            case System.Globalization.UnicodeCategory.OtherLetter:
                            case System.Globalization.UnicodeCategory.LetterNumber:
                            case System.Globalization.UnicodeCategory.OtherNumber:
                            case System.Globalization.UnicodeCategory.ConnectorPunctuation:
                                pattern = pattern.Remove(nameIndex - 1, 1);
                                cur--;
                                break;
                            case System.Globalization.UnicodeCategory.DecimalDigitNumber:
                                //  actually don't want to unescape '\1', '\2' etc. which are references to groups
                                break;
                        }
                        break;
                }
                if (++cur >= pattern.Length) {
                    break;
                }
            }

            res.Pattern = pattern;
            return res;
        }
Exemple #6
0
            internal RE_Pattern(CodeContext/*!*/ context, object pattern, int flags, bool compiled) {
                _pre = PreParseRegex(context, ValidatePattern(pattern));
                try {
                    flags |= OptionToFlags(_pre.Options);
                    RegexOptions opts = FlagsToOption(flags);
#if SILVERLIGHT
                    this._re = new Regex(_pre.Pattern, opts);
#else
                    this._re = new Regex(_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None));
#endif
                } catch (ArgumentException e) {
                    throw PythonExceptions.CreateThrowable(error(context), e.Message);
                }
                this._compileFlags = flags;
            }
Exemple #7
0
 public RE_Pattern(object pattern, int flags)
 {
     pre = PreParseRegex(ValidatePattern(pattern));
     try {
         RegexOptions opts = FlagsToOption(flags);
         this.re = new Regex(pre.Pattern, opts);
     } catch (ArgumentException e) {
         throw ExceptionConverter.CreateThrowable(error, e.Message);
     }
     this.compileFlags = flags;
 }