public RE_Pattern(object pattern, int flags) { pre = PreParseRegex(ValidatePattern(pattern)); try { RegexOptions opts = FlagsToOption(flags); this.re = new Regex(pre.Pattern, opts); } catch (ArgumentException e) { throw ExceptionConverter.CreateThrowable(error, e.Message); } this.compileFlags = flags; }
private ParsedRegex ParseSpecial(char [] glob, int index) { foreach (var parser in Parsers) { ParsedRegex parsedRegex = parser(glob, index); if (parsedRegex != null) { return(parsedRegex); } } return(null); }
private string GetRegexStringFromGlob(string glob) { ParsedRegex parsedRegex = ParseGlob(glob.ToCharArray(), 0); if (parsedRegex != null && parsedRegex.Index == glob.Length) { return(parsedRegex.Regex); } else { throw new ConfigurationException(string.Format("syntax error in glob `{0}'", glob)); } }
/// <summary> /// Preparses a regular expression text returning a ParsedRegex class /// that can be used for further regular expressions. /// </summary> private static ParsedRegex PreParseRegex(string pattern) { ParsedRegex res = new ParsedRegex(pattern); //string newPattern; int cur = 0, nameIndex; int curGroup = 0; bool containsNamedGroup = false; for (; ;) { nameIndex = pattern.IndexOf("(", cur); if (nameIndex == -1) { break; } if (nameIndex == pattern.Length - 1) { break; } switch (pattern[++nameIndex]) { case '?': // extension syntax if (nameIndex == pattern.Length - 1) { throw ExceptionConverter.CreateThrowable(error, "unexpected end of regex"); } switch (pattern[++nameIndex]) { case 'P': // named regex, .NET doesn't expect the P so we'll remove it; // also, once we see a named group i.e. ?P then we need to start artificially // naming all unnamed groups from then on---this is to get around the fact that // the CLR RegEx support orders all the unnamed groups before all the named // groups, even if the named groups are before the unnamed ones in the pattern; // the artificial naming preserves the order of the groups and thus the order of // the matches containsNamedGroup = true; pattern = pattern.Remove(nameIndex, 1); break; case 'i': res.Options |= RegexOptions.IgnoreCase; break; case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break; case 'm': res.Options |= RegexOptions.Multiline; break; case 's': res.Options |= RegexOptions.Singleline; break; case 'u': break; case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break; case ':': break; // non-capturing case '=': break; // look ahead assertion case '<': break; // positive look behind assertion case '!': break; // negative look ahead assertion case '(': // yes/no if group exists, we don't support this default: throw ExceptionConverter.CreateThrowable(error, "Unrecognized extension " + pattern[nameIndex]); } break; default: // just another group curGroup++; if (containsNamedGroup) { // need to name this unnamed group pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">"); } break; } cur = nameIndex; } cur = 0; for (; ;) { nameIndex = pattern.IndexOf('\\', cur); if (nameIndex == -1 || nameIndex == pattern.Length - 1) { break; } char curChar = pattern[++nameIndex]; switch (curChar) { case 'x': case 'u': case 'a': case 'b': case 'e': case 'f': case 'n': case 'r': case 't': case 'v': case 'c': case 's': case 'W': case 'w': case 'p': case 'P': case 'S': case 'd': case 'D': // known escape sequences, leave escaped. break; default: System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar); switch (charClass) { // recognized word characters, always unescape. case System.Globalization.UnicodeCategory.ModifierLetter: case System.Globalization.UnicodeCategory.LowercaseLetter: case System.Globalization.UnicodeCategory.UppercaseLetter: case System.Globalization.UnicodeCategory.TitlecaseLetter: case System.Globalization.UnicodeCategory.OtherLetter: case System.Globalization.UnicodeCategory.DecimalDigitNumber: case System.Globalization.UnicodeCategory.LetterNumber: case System.Globalization.UnicodeCategory.OtherNumber: case System.Globalization.UnicodeCategory.ConnectorPunctuation: pattern = pattern.Remove(nameIndex - 1, 1); break; } break; } cur++; } res.Pattern = pattern; return(res); }
/// <summary> /// Preparses a regular expression text returning a ParsedRegex class /// that can be used for further regular expressions. /// </summary> private static ParsedRegex PreParseRegex(CodeContext/*!*/ context, string pattern) { ParsedRegex res = new ParsedRegex(pattern); //string newPattern; int cur = 0, nameIndex; int curGroup = 0; bool isCharList = false; bool containsNamedGroup = false; for (; ; ) { nameIndex = pattern.IndexOfAny(_preParsedChars, cur); if (nameIndex > 0 && pattern[nameIndex - 1] == '\\') { int curIndex = nameIndex - 2; int backslashCount = 1; while (curIndex >= 0 && pattern[curIndex] == '\\') { backslashCount++; curIndex--; } // odd number of back slashes, this is an optional // paren that we should ignore. if ((backslashCount & 0x01) != 0) { cur++; continue; } } if (nameIndex == -1) break; if (nameIndex == pattern.Length - 1) break; switch (pattern[nameIndex]) { case '{': if (pattern[++nameIndex] == ',') { // no beginning specified for the n-m quntifier, add the // default 0 value. pattern = pattern.Insert(nameIndex, "0"); } break; case '[': nameIndex++; isCharList = true; break; case ']': nameIndex++; isCharList = false; break; case '(': // make sure we're not dealing with [(] if (!isCharList) { switch (pattern[++nameIndex]) { case '?': // extension syntax if (nameIndex == pattern.Length - 1) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex"); switch (pattern[++nameIndex]) { case 'P': // named regex, .NET doesn't expect the P so we'll remove it; // also, once we see a named group i.e. ?P then we need to start artificially // naming all unnamed groups from then on---this is to get around the fact that // the CLR RegEx support orders all the unnamed groups before all the named // groups, even if the named groups are before the unnamed ones in the pattern; // the artificial naming preserves the order of the groups and thus the order of // the matches if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=') { // match whatever was previously matched by the named group // remove the (?P= pattern = pattern.Remove(nameIndex - 2, 4); pattern = pattern.Insert(nameIndex - 2, "\\k<"); int tmpIndex = nameIndex; while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')') tmpIndex++; if (tmpIndex == pattern.Length) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex"); pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1); } else { containsNamedGroup = true; pattern = pattern.Remove(nameIndex, 1); } break; case 'i': res.Options |= RegexOptions.IgnoreCase; break; case 'L': res.Options &= ~(RegexOptions.CultureInvariant); RemoveOption(ref pattern, ref nameIndex); break; case 'm': res.Options |= RegexOptions.Multiline; break; case 's': res.Options |= RegexOptions.Singleline; break; case 'u': // specify unicode; not relevant and not valid under .NET as we're always unicode // -- so the option needs to be removed RemoveOption(ref pattern, ref nameIndex); break; case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break; case ':': break; // non-capturing case '=': break; // look ahead assertion case '<': break; // positive look behind assertion case '!': break; // negative look ahead assertion case '#': break; // inline comment case '(': // yes/no if group exists, we don't support this default: throw PythonExceptions.CreateThrowable(error(context), "Unrecognized extension " + pattern[nameIndex]); } break; default: // just another group curGroup++; if (containsNamedGroup) { // need to name this unnamed group pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">"); } break; } } else { nameIndex++; } break; } cur = nameIndex; } cur = 0; for (; ; ) { nameIndex = pattern.IndexOf('\\', cur); if (nameIndex == -1 || nameIndex == pattern.Length - 1) break; cur = ++nameIndex; char curChar = pattern[cur]; switch (curChar) { case 'x': case 'u': case 'a': case 'b': case 'e': case 'f': case 'k': case 'n': case 'r': case 't': case 'v': case 'c': case 's': case 'W': case 'w': case 'p': case 'P': case 'S': case 'd': case 'D': case 'Z': case '\\': // known escape sequences, leave escaped. break; default: System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar); switch (charClass) { // recognized word characters, always unescape. case System.Globalization.UnicodeCategory.ModifierLetter: case System.Globalization.UnicodeCategory.LowercaseLetter: case System.Globalization.UnicodeCategory.UppercaseLetter: case System.Globalization.UnicodeCategory.TitlecaseLetter: case System.Globalization.UnicodeCategory.OtherLetter: case System.Globalization.UnicodeCategory.LetterNumber: case System.Globalization.UnicodeCategory.OtherNumber: case System.Globalization.UnicodeCategory.ConnectorPunctuation: pattern = pattern.Remove(nameIndex - 1, 1); cur--; break; case System.Globalization.UnicodeCategory.DecimalDigitNumber: // actually don't want to unescape '\1', '\2' etc. which are references to groups break; } break; } if (++cur >= pattern.Length) { break; } } res.Pattern = pattern; return res; }
internal RE_Pattern(CodeContext/*!*/ context, object pattern, int flags, bool compiled) { _pre = PreParseRegex(context, ValidatePattern(pattern)); try { flags |= OptionToFlags(_pre.Options); RegexOptions opts = FlagsToOption(flags); #if SILVERLIGHT this._re = new Regex(_pre.Pattern, opts); #else this._re = new Regex(_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None)); #endif } catch (ArgumentException e) { throw PythonExceptions.CreateThrowable(error(context), e.Message); } this._compileFlags = flags; }