Example #1
0
        internal EcmaRegExp Clone()
        {
            EcmaRegExp clone = new EcmaRegExp(GetSpeciesConstructor(this, WellKnownObject.RegExpConstructor));

            clone.Init(this);
            return(clone);
        }
Example #2
0
 public MatchResult(EcmaRegExp re, string input, Match result, Hashtable groupNameMap, int offset)
 {
     this.re           = re;
     this.Input        = input;
     this.result       = result;
     this.groupNameMap = groupNameMap;
     this.stickyOffset = offset;
 }
Example #3
0
 internal void Init(EcmaRegExp other)
 {
     this.nativeRegexp      = other.nativeRegexp;
     this.OriginalFlags     = other.OriginalFlags;
     this.captureGroups     = other.captureGroups;
     this.numericGroupCount = other.numericGroupCount;
     this.Source            = other.Source;
     this.Flags             = other.Flags;
 }
Example #4
0
        internal EcmaRegExp Clone(bool global)
        {
            EcmaRegExp clone = Clone();

            if (clone.Global != global)
            {
                clone.OriginalFlags ^= EcmaRegExpFlags.Global;
            }
            return(clone);
        }
Example #5
0
        public static EcmaValue RegExpLiteral(string str)
        {
            Guard.ArgumentNotNull(str, "str");
            if (str.Length == 0)
            {
                throw new ArgumentException("Literal cannot be empty", "str");
            }
            int lastPos = str.LastIndexOf('/');

            if (str[0] != '/' || lastPos <= 1)
            {
                throw new ArgumentException("Invalid RegExp literal", "str");
            }
            return(EcmaRegExp.Parse(str.Substring(1, lastPos - 1), str.Substring(lastPos + 1)));
        }
Example #6
0
 public MatchEvaluatorClass(EcmaRegExp re, string input, RuntimeObject replacement)
 {
     this.re          = re;
     this.input       = input;
     this.replacement = replacement;
 }
Example #7
0
        public static EcmaRegExp Parse(string pattern, string flags)
        {
            Guard.ArgumentNotNull(pattern, "pattern");
            Guard.ArgumentNotNull(flags, "flags");
            string key = String.Concat("/", pattern, "/", flags);

            if (!cache.TryGetValue(key, out EcmaRegExp re))
            {
                EcmaRegExpFlags options    = 0;
                string          canonFlags = "";
                canonFlags += AddFlag(flags, "g", EcmaRegExpFlags.Global, ref options);
                canonFlags += AddFlag(flags, "i", EcmaRegExpFlags.IgnoreCase, ref options);
                canonFlags += AddFlag(flags, "m", EcmaRegExpFlags.Multiline, ref options);
                canonFlags += AddFlag(flags, "s", EcmaRegExpFlags.DotAll, ref options);
                canonFlags += AddFlag(flags, "u", EcmaRegExpFlags.Unicode, ref options);
                canonFlags += AddFlag(flags, "y", EcmaRegExpFlags.Sticky, ref options);
                if (flags.Length != canonFlags.Length)
                {
                    throw new EcmaSyntaxErrorException(InternalString.Error.InvalidRegexFlags);
                }

                string        nPattern          = pattern;
                int           numericGroupCount = 1;
                List <string> captureGroups     = new List <string> {
                    "0"
                };
                nPattern = reGroups.Replace(nPattern, m => {
                    switch (m.Value[0])
                    {
                    case '(':
                        // .NET has different ordering of numeric and named groups
                        // and also detect duplicated group names which are allowed in .NET
                        string name = m.Groups[2].Success ? m.Groups[2].Value : (numericGroupCount++).ToString();
                        if (captureGroups.Contains(name))
                        {
                            throw new EcmaSyntaxErrorException(InternalString.Error.RegExpDuplicatedNameGroup);
                        }
                        captureGroups.Add(name);
                        break;

                    case '\\':
                        // .NET consider having an invalid back reference (backref to capture at the right) as failure
                        if (m.Groups[1].Success && Int32.Parse(m.Groups[1].Value) >= captureGroups.Count)
                        {
                            return(String.Empty);
                        }
                        break;
                    }
                    return(m.Value);
                });

                bool   unicode       = (options & EcmaRegExpFlags.Unicode) != 0;
                string allChars      = unicode ? "(?:[\0-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])" : "[\0-\uFFFF]";
                string wildcardChars = (options & EcmaRegExpFlags.DotAll) != 0 ? allChars : unicode ? "(?:[\0-\t\x0B\f\x0E-\u2027\u202A-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])" : "[\0-\t\x0B\f\x0E-\u2027\u202A-\uFFFF]";

                // replace escape sequences that are not supported in ECMAScript but has semantic meaning in .NET
                nPattern = (unicode ? reUnsupportedEscape : reUnsupportedEscapeNonUnicode).Replace(nPattern, "$1$1$2");

                // convert character class \w, \W, \s, \S and wildcard to explicit character set
                // and UnicodeEscape (\u{nnnnnn}) which is not supported in .NET
                nPattern = reCharClass.Replace(nPattern, m => {
                    if (m.Value[0] == '\\')
                    {
                        switch (m.Value[1])
                        {
                        case 'w':
                            return("[a-zA-Z0-9_]");

                        case 'W':
                            return("[^a-zA-Z0-9_]");

                        case 's':
                            return("[\f\n\r\t\v\u2028\u2029\\p{Zs}]");

                        case 'S':
                            return("[^\f\n\r\t\v\u2028\u2029\\p{Zs}]");

                        case 'u':
                            return(ConvertUnicodeEscape(m.Value));
                        }
                        return(m.Value);
                    }
                    if (m.Value[0] == '.')
                    {
                        return(wildcardChars);
                    }
                    if (m.Groups[2].Captures.Count == 0)
                    {
                        // ECMAScript allows empty CharacterClass in pattern
                        // a negated empty CharacterClass means all code units or code points
                        if (m.Groups[1].Length != 0)
                        {
                            return(allChars);
                        }
                        return("(?!)");
                    }
                    StringBuilder sb = new StringBuilder();
                    sb.Append('[');
                    sb.Append(m.Groups[1].Value);
                    foreach (Capture c in m.Groups[2].Captures)
                    {
                        if (c.Value[0] == '\\')
                        {
                            switch (c.Value[1])
                            {
                            case 'w':
                                sb.Append("a-zA-Z0-9_");
                                continue;

                            case 'W':
                                sb.Append(unicode ? "\0-/:-@\\[-^`{-\uDBFF\uDFFF" : "\0-/:-@\\[-^`{-\uFFFF");
                                continue;

                            case 's':
                                sb.Append("\f\n\r\t\v\u2028\u2029\\p{Zs}");
                                continue;

                            case 'S':
                                sb.Append(unicode ? "\x00-\x08\x0E-\x19\x21-\x99\u00A1-\u1679\u1681-\u1FFF\u200B-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uDBFF\uDFFF" :
                                          "\x00-\x08\x0E-\x19\x21-\x99\u00A1-\u1679\u1681-\u1FFF\u200B-\u2027\u202A-\u202E\u2030-\u205E\u2060-\u2FFF\u3001-\uFFFF");
                                continue;

                            case 'u':
                                sb.Append(ConvertUnicodeEscape(c.Value));
                                continue;
                            }
                        }
                        sb.Append(c.Value);
                    }
                    sb.Append(']');
                    return(sb.ToString());
                });

                // convert surrogate pairs (non-BMP character) and lone surrogates, and character class which contains such characters
                // to appropriate pattern to correctly match code points
                if (unicode && Regex.IsMatch(nPattern, "[\uD800-\uDFFF]"))
                {
                    nPattern = reCodePoints.Replace(nPattern, m => {
                        if (m.Groups[1].Success)
                        {
                            return(TransformCharacterRange(m.Value, m.Groups[2].Value, m.Groups[1].Length > 0));
                        }
                        if (m.Groups[3].Success)
                        {
                            string chars = m.Groups[3].Value;
                            if (chars.Length == 1)
                            {
                                chars = Char.IsHighSurrogate(chars[0]) ? chars + "(?![\udc00-\udfff])" : "(?<![\ud800-\udbff])" + chars;
                            }
                            return(m.Groups[4].Success ? "(?:" + chars + ")" + m.Groups[4].Value : chars);
                        }
                        return(m.Value);
                    });
                }

                RegexOptions nOptions = RegexOptions.ECMAScript;
                if ((options & EcmaRegExpFlags.IgnoreCase) != 0)
                {
                    nOptions |= RegexOptions.IgnoreCase;
                }
                if ((options & EcmaRegExpFlags.Multiline) != 0)
                {
                    nOptions |= RegexOptions.Multiline;
                }
                Regex nativeRegexp;
                try {
                    nativeRegexp = new Regex(nPattern, nOptions);
                } catch (ArgumentException) {
                    throw new EcmaSyntaxErrorException(InternalString.Error.InvalidRegex);
                }
                re = new EcmaRegExp(nativeRegexp, pattern, canonFlags, options, numericGroupCount, captureGroups.ToArray());
                cache.TryAdd(key, re);
            }
            return((EcmaRegExp)re.Clone(RuntimeRealm.Current));
        }