Exemple #1
0
        private EcmaRegExp(Regex nativeRegexp, string pattern, string canonFlags, EcmaRegExpFlags flags, int numericGroupCount, string[] captureGroups)
            : base(WellKnownObject.RegExpPrototype, true)
        {
            Guard.ArgumentNotNull(nativeRegexp, "nativeRegexp");
            Guard.ArgumentNotNull(pattern, "pattern");
            Guard.ArgumentNotNull(canonFlags, "canonFlags");
            Guard.ArgumentNotNull(captureGroups, "captureGroups");

            this.nativeRegexp      = nativeRegexp;
            this.OriginalFlags     = flags;
            this.Flags             = canonFlags;
            this.numericGroupCount = numericGroupCount;
            this.captureGroups     = captureGroups;
            this.Source            = pattern.Length == 0 ? "(?:)" : Regex.Replace(pattern, "(?<!\\\\)/|[\n\r]", m => {
                switch (m.Value[0])
                {
                case '/': return("\\/");

                case '\n': return("\\n");

                case '\r': return("\\r");
                }
                return(m.Value);
            });
            DefineOwnPropertyNoChecked(WellKnownProperty.LastIndex, new EcmaPropertyDescriptor(0, EcmaPropertyAttributes.Writable));
        }
Exemple #2
0
 private static string AddFlag(string flags, string ch, EcmaRegExpFlags flag, ref EcmaRegExpFlags value)
 {
     if (flags.Contains(ch))
     {
         value |= flag;
         return(ch);
     }
     return(String.Empty);
 }
Exemple #3
0
        public static EcmaRegExp Parse(string pattern, string flags)
        {
            Guard.ArgumentNotNull(pattern, "pattern");
            Guard.ArgumentNotNull(flags, "flags");
            string key = String.Concat("/", pattern, "/", flags);

            if (!cache.TryGetValue(key, out EcmaRegExp re))
            {
                EcmaRegExpFlags options    = 0;
                string          canonFlags = "";
                canonFlags += AddFlag(flags, "g", EcmaRegExpFlags.Global, ref options);
                canonFlags += AddFlag(flags, "i", EcmaRegExpFlags.IgnoreCase, ref options);
                canonFlags += AddFlag(flags, "m", EcmaRegExpFlags.Multiline, ref options);
                canonFlags += AddFlag(flags, "s", EcmaRegExpFlags.DotAll, ref options);
                canonFlags += AddFlag(flags, "u", EcmaRegExpFlags.Unicode, ref options);
                canonFlags += AddFlag(flags, "y", EcmaRegExpFlags.Sticky, ref options);
                if (flags.Length != canonFlags.Length)
                {
                    throw new EcmaSyntaxErrorException(InternalString.Error.InvalidRegexFlags);
                }

                string        nPattern          = pattern;
                int           numericGroupCount = 1;
                List <string> captureGroups     = new List <string> {
                    "0"
                };
                nPattern = reGroups.Replace(nPattern, m => {
                    switch (m.Value[0])
                    {
                    case '(':
                        // .NET has different ordering of numeric and named groups
                        // and also detect duplicated group names which are allowed in .NET
                        string name = m.Groups[2].Success ? m.Groups[2].Value : (numericGroupCount++).ToString();
                        if (captureGroups.Contains(name))
                        {
                            throw new EcmaSyntaxErrorException(InternalString.Error.RegExpDuplicatedNameGroup);
                        }
                        captureGroups.Add(name);
                        break;

                    case '\\':
                        // .NET consider having an invalid back reference (backref to capture at the right) as failure
                        if (m.Groups[1].Success && Int32.Parse(m.Groups[1].Value) >= captureGroups.Count)
                        {
                            return(String.Empty);
                        }
                        break;
                    }
                    return(m.Value);
                });

                bool   unicode       = (options & EcmaRegExpFlags.Unicode) != 0;
                string allChars      = unicode ? "(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])" : "[\0-\uFFFF]";
                string wildcardChars = (options & EcmaRegExpFlags.DotAll) != 0 ? allChars : unicode ? "(?:[\0-\t\x0B\f\x0E-\u2027\u202A-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])" : "[\0-\t\x0B\f\x0E-\u2027\u202A-\uFFFF]";

                // replace escape sequences that are not supported in ECMAScript but has semantic meaning in .NET
                nPattern = (unicode ? reUnsupportedEscape : reUnsupportedEscapeNonUnicode).Replace(nPattern, "$1$1$2");

                // convert character class \w, \W, \s, \S and wildcard to explicit character set
                // and UnicodeEscape (\u{nnnnnn}) which is not supported in .NET
                nPattern = reCharClass.Replace(nPattern, m => {
                    if (m.Value[0] == '\\')
                    {
                        switch (m.Value[1])
                        {
                        case 'w':
                            return("[a-zA-Z0-9_]");

                        case 'W':
                            return("[^a-zA-Z0-9_]");

                        case 's':
                            return("[\f\n\r\t\v\u2028\u2029\\p{Zs}]");

                        case 'S':
                            return("[^\f\n\r\t\v\u2028\u2029\\p{Zs}]");

                        case 'u':
                            return(ConvertUnicodeEscape(m.Value));
                        }
                        return(m.Value);
                    }
                    if (m.Value[0] == '.')
                    {
                        return(wildcardChars);
                    }
                    if (m.Groups[2].Captures.Count == 0)
                    {
                        // ECMAScript allows empty CharacterClass in pattern
                        // a negated empty CharacterClass means all code units or code points
                        if (m.Groups[1].Length != 0)
                        {
                            return(allChars);
                        }
                        return("(?!)");
                    }
                    StringBuilder sb = new StringBuilder();
                    sb.Append('[');
                    sb.Append(m.Groups[1].Value);
                    foreach (Capture c in m.Groups[2].Captures)
                    {
                        if (c.Value[0] == '\\')
                        {
                            switch (c.Value[1])
                            {
                            case 'w':
                                sb.Append("a-zA-Z0-9_");
                                continue;

                            case 'W':
                                sb.Append(unicode ? "\0-/:-@\\[-^`{-\uDBFF\uDFFF" : "\0-/:-@\\[-^`{-\uFFFF");
                                continue;

                            case 's':
                                sb.Append("\f\n\r\t\v\u2028\u2029\\p{Zs}");
                                continue;

                            case 'S':
                                sb.Append(unicode ? "\x00-\x08\x0E-\x19\x21-\x99\u00A1-\u1679\u1681-\u1FFF\u200B-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uDBFF\uDFFF" :
                                          "\x00-\x08\x0E-\x19\x21-\x99\u00A1-\u1679\u1681-\u1FFF\u200B-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uFFFF");
                                continue;

                            case 'u':
                                sb.Append(ConvertUnicodeEscape(c.Value));
                                continue;
                            }
                        }
                        sb.Append(c.Value);
                    }
                    sb.Append(']');
                    return(sb.ToString());
                });

                // convert surrogate pairs (non-BMP character) and lone surrogates, and character class which contains such characters
                // to appropriate pattern to correctly match code points
                if (unicode && Regex.IsMatch(nPattern, "[\uD800-\uDFFF]"))
                {
                    nPattern = reCodePoints.Replace(nPattern, m => {
                        if (m.Groups[1].Success)
                        {
                            return(TransformCharacterRange(m.Value, m.Groups[2].Value, m.Groups[1].Length > 0));
                        }
                        if (m.Groups[3].Success)
                        {
                            string chars = m.Groups[3].Value;
                            if (chars.Length == 1)
                            {
                                chars = Char.IsHighSurrogate(chars[0]) ? chars + "(?![\udc00-\udfff])" : "(?<![\ud800-\udbff])" + chars;
                            }
                            return(m.Groups[4].Success ? "(?:" + chars + ")" + m.Groups[4].Value : chars);
                        }
                        return(m.Value);
                    });
                }

                RegexOptions nOptions = RegexOptions.ECMAScript;
                if ((options & EcmaRegExpFlags.IgnoreCase) != 0)
                {
                    nOptions |= RegexOptions.IgnoreCase;
                }
                if ((options & EcmaRegExpFlags.Multiline) != 0)
                {
                    nOptions |= RegexOptions.Multiline;
                }
                Regex nativeRegexp;
                try {
                    nativeRegexp = new Regex(nPattern, nOptions);
                } catch (ArgumentException) {
                    throw new EcmaSyntaxErrorException(InternalString.Error.InvalidRegex);
                }
                re = new EcmaRegExp(nativeRegexp, pattern, canonFlags, options, numericGroupCount, captureGroups.ToArray());
                cache.TryAdd(key, re);
            }
            return((EcmaRegExp)re.Clone(RuntimeRealm.Current));
        }