internal EcmaRegExp Clone() { EcmaRegExp clone = new EcmaRegExp(GetSpeciesConstructor(this, WellKnownObject.RegExpConstructor)); clone.Init(this); return(clone); }
public MatchResult(EcmaRegExp re, string input, Match result, Hashtable groupNameMap, int offset) { this.re = re; this.Input = input; this.result = result; this.groupNameMap = groupNameMap; this.stickyOffset = offset; }
internal void Init(EcmaRegExp other) { this.nativeRegexp = other.nativeRegexp; this.OriginalFlags = other.OriginalFlags; this.captureGroups = other.captureGroups; this.numericGroupCount = other.numericGroupCount; this.Source = other.Source; this.Flags = other.Flags; }
internal EcmaRegExp Clone(bool global) { EcmaRegExp clone = Clone(); if (clone.Global != global) { clone.OriginalFlags ^= EcmaRegExpFlags.Global; } return(clone); }
public static EcmaValue RegExpLiteral(string str) { Guard.ArgumentNotNull(str, "str"); if (str.Length == 0) { throw new ArgumentException("Literal cannot be empty", "str"); } int lastPos = str.LastIndexOf('/'); if (str[0] != '/' || lastPos <= 1) { throw new ArgumentException("Invalid RegExp literal", "str"); } return(EcmaRegExp.Parse(str.Substring(1, lastPos - 1), str.Substring(lastPos + 1))); }
public MatchEvaluatorClass(EcmaRegExp re, string input, RuntimeObject replacement) { this.re = re; this.input = input; this.replacement = replacement; }
public static EcmaRegExp Parse(string pattern, string flags) { Guard.ArgumentNotNull(pattern, "pattern"); Guard.ArgumentNotNull(flags, "flags"); string key = String.Concat("/", pattern, "/", flags); if (!cache.TryGetValue(key, out EcmaRegExp re)) { EcmaRegExpFlags options = 0; string canonFlags = ""; canonFlags += AddFlag(flags, "g", EcmaRegExpFlags.Global, ref options); canonFlags += AddFlag(flags, "i", EcmaRegExpFlags.IgnoreCase, ref options); canonFlags += AddFlag(flags, "m", EcmaRegExpFlags.Multiline, ref options); canonFlags += AddFlag(flags, "s", EcmaRegExpFlags.DotAll, ref options); canonFlags += AddFlag(flags, "u", EcmaRegExpFlags.Unicode, ref options); canonFlags += AddFlag(flags, "y", EcmaRegExpFlags.Sticky, ref options); if (flags.Length != canonFlags.Length) { throw new EcmaSyntaxErrorException(InternalString.Error.InvalidRegexFlags); } string nPattern = pattern; int numericGroupCount = 1; List <string> captureGroups = new List <string> { "0" }; nPattern = reGroups.Replace(nPattern, m => { switch (m.Value[0]) { case '(': // .NET has different ordering of numeric and named groups // and also detect duplicated group names which are allowed in .NET string name = m.Groups[2].Success ? m.Groups[2].Value : (numericGroupCount++).ToString(); if (captureGroups.Contains(name)) { throw new EcmaSyntaxErrorException(InternalString.Error.RegExpDuplicatedNameGroup); } captureGroups.Add(name); break; case '\\': // .NET consider having an invalid back reference (backref to capture at the right) as failure if (m.Groups[1].Success && Int32.Parse(m.Groups[1].Value) >= captureGroups.Count) { return(String.Empty); } break; } return(m.Value); }); bool unicode = (options & EcmaRegExpFlags.Unicode) != 0; string allChars = unicode ? "(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])" : "[\0-\uFFFF]"; string wildcardChars = (options & EcmaRegExpFlags.DotAll) != 0 ? allChars : unicode ? "(?:[\0-\t\x0B\f\x0E-\u2027\u202A-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])" : "[\0-\t\x0B\f\x0E-\u2027\u202A-\uFFFF]"; // replace escape sequences that are not supported in ECMAScript but has semantic meaning in .NET nPattern = (unicode ? reUnsupportedEscape : reUnsupportedEscapeNonUnicode).Replace(nPattern, "$1$1$2"); // convert character class \w, \W, \s, \S and wildcard to explicit character set // and UnicodeEscape (\u{nnnnnn}) which is not supported in .NET nPattern = reCharClass.Replace(nPattern, m => { if (m.Value[0] == '\\') { switch (m.Value[1]) { case 'w': return("[a-zA-Z0-9_]"); case 'W': return("[^a-zA-Z0-9_]"); case 's': return("[\f\n\r\t\v\u2028\u2029\\p{Zs}]"); case 'S': return("[^\f\n\r\t\v\u2028\u2029\\p{Zs}]"); case 'u': return(ConvertUnicodeEscape(m.Value)); } return(m.Value); } if (m.Value[0] == '.') { return(wildcardChars); } if (m.Groups[2].Captures.Count == 0) { // ECMAScript allows empty CharacterClass in pattern // a negated empty CharacterClass means all code units or code points if (m.Groups[1].Length != 0) { return(allChars); } return("(?!)"); } StringBuilder sb = new StringBuilder(); sb.Append('['); sb.Append(m.Groups[1].Value); foreach (Capture c in m.Groups[2].Captures) { if (c.Value[0] == '\\') { switch (c.Value[1]) { case 'w': sb.Append("a-zA-Z0-9_"); continue; case 'W': sb.Append(unicode ? "\0-/:-@\\[-^`{-\uDBFF\uDFFF" : "\0-/:-@\\[-^`{-\uFFFF"); continue; case 's': sb.Append("\f\n\r\t\v\u2028\u2029\\p{Zs}"); continue; case 'S': sb.Append(unicode ? "\x00-\x08\x0E-\x19\x21-\x99\u00A1-\u1679\u1681-\u1FFF\u200B-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uDBFF\uDFFF" : "\x00-\x08\x0E-\x19\x21-\x99\u00A1-\u1679\u1681-\u1FFF\u200B-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uFFFF"); continue; case 'u': sb.Append(ConvertUnicodeEscape(c.Value)); continue; } } sb.Append(c.Value); } sb.Append(']'); return(sb.ToString()); }); // convert surrogate pairs (non-BMP character) and lone surrogates, and character class which contains such characters // to appropriate pattern to correctly match code points if (unicode && Regex.IsMatch(nPattern, "[\uD800-\uDFFF]")) { nPattern = reCodePoints.Replace(nPattern, m => { if (m.Groups[1].Success) { return(TransformCharacterRange(m.Value, m.Groups[2].Value, m.Groups[1].Length > 0)); } if (m.Groups[3].Success) { string chars = m.Groups[3].Value; if (chars.Length == 1) { chars = Char.IsHighSurrogate(chars[0]) ? chars + "(?![\udc00-\udfff])" : "(?<![\ud800-\udbff])" + chars; } return(m.Groups[4].Success ? "(?:" + chars + ")" + m.Groups[4].Value : chars); } return(m.Value); }); } RegexOptions nOptions = RegexOptions.ECMAScript; if ((options & EcmaRegExpFlags.IgnoreCase) != 0) { nOptions |= RegexOptions.IgnoreCase; } if ((options & EcmaRegExpFlags.Multiline) != 0) { nOptions |= RegexOptions.Multiline; } Regex nativeRegexp; try { nativeRegexp = new Regex(nPattern, nOptions); } catch (ArgumentException) { throw new EcmaSyntaxErrorException(InternalString.Error.InvalidRegex); } re = new EcmaRegExp(nativeRegexp, pattern, canonFlags, options, numericGroupCount, captureGroups.ToArray()); cache.TryAdd(key, re); } return((EcmaRegExp)re.Clone(RuntimeRealm.Current)); }