Ejemplo n.º 1
0
        public async Task Matches(RegexEngine engine, string pattern, string input, RegexOptions options, CaptureData[] expected)
        {
            Regex regexAdvanced = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            VerifyMatches(regexAdvanced.Matches(input), expected);
            VerifyMatches(regexAdvanced.Match(input), expected);
        }
Ejemplo n.º 2
0
        public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, RegexOptions options, int expectedCount)
        {
            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            Assert.Equal(expectedCount, r.Count(input));
            Assert.Equal(expectedCount, r.Count(input.AsSpan()));
            Assert.Equal(r.Count(input), r.Matches(input).Count);
            Assert.Equal(r.Count(input.AsSpan()), r.Matches(input).Count);

            if (options == RegexOptions.None && engine == RegexEngine.Interpreter)
            {
                Assert.Equal(expectedCount, Regex.Count(input, pattern));
                Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern));
            }

            switch (engine)
            {
            case RegexEngine.Interpreter:
            case RegexEngine.Compiled:
            case RegexEngine.NonBacktracking:
                RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine);
                Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions));
                Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions));
                Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout));
                Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions, Regex.InfiniteMatchTimeout));
                break;
            }
        }
Ejemplo n.º 3
0
        public async Task CharactersComparedOneByOne_Invariant(RegexEngine engine, RegexOptions options)
        {
            // Regex compares characters one by one.  If that changes, it could impact the behavior of
            // a case like this, where these characters are not the same, but the strings compare
            // as equal with the invariant culture (and some other cultures as well).
            const string S1 = "\u00D6\u200D";
            const string S2 = "\u004F\u0308";

            // Validate the chosen strings to make sure they compare the way we want to test via Regex
            Assert.False(S1[0] == S2[0]);
            Assert.False(S1[1] == S2[1]);
            Assert.StartsWith(S1, S2, StringComparison.InvariantCulture);
            Assert.True(S1.Equals(S2, StringComparison.InvariantCulture));

            // Test varying lengths of strings to validate codegen changes that kick in at longer lengths
            foreach (int multiple in new[] { 1, 10, 100 })
            {
                string pattern = string.Concat(Enumerable.Repeat(S1, multiple));
                string input   = string.Concat(Enumerable.Repeat(S2, multiple));
                Regex  r;

                // Validate when the string is at the beginning of the pattern, as it impacts prefix matching.
                r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

                Assert.False(r.IsMatch(input));
                Assert.True(r.IsMatch(pattern));

                // Validate when it's not at the beginning of the pattern, as it impacts "multi" matching.
                r = await RegexHelpers.GetRegexAsync(engine, "[abc]" + pattern, options);

                Assert.False(r.IsMatch("a" + input));
                Assert.True(r.IsMatch("a" + pattern));
            }
        }
Ejemplo n.º 4
0
        public async Task TurkishI_Is_Differently_LowerUpperCased_In_Turkish_Culture_NonBacktracking()
        {
            var    turkish = new CultureInfo("tr-TR");
            string input   = "I\u0131\u0130i";

            // Use the input as the regex also
            // Ignore the Compiled option here because it is a noop in combination with NonBacktracking
            Regex cultInvariantRegex = await RegexHelpers.GetRegexAsync(RegexEngine.NonBacktracking, input, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, CultureInfo.InvariantCulture);

            Regex turkishRegex = await RegexHelpers.GetRegexAsync(RegexEngine.NonBacktracking, input, RegexOptions.IgnoreCase, turkish);

            Assert.True(cultInvariantRegex.IsMatch(input));
            Assert.True(turkishRegex.IsMatch(input));    // <---------- This result differs from the result in the previous test!!!

            // As above and no surprises here
            // The regexes recognize different lowercase variants of different versions of i differently
            Assert.True(cultInvariantRegex.IsMatch(input.ToLowerInvariant()));
            Assert.False(cultInvariantRegex.IsMatch(input.ToLower(turkish)));

            Assert.False(turkishRegex.IsMatch(input.ToLowerInvariant()));
            Assert.True(turkishRegex.IsMatch(input.ToLower(turkish)));

            // The same holds symmetrically for ToUpper
            Assert.True(cultInvariantRegex.IsMatch(input.ToUpperInvariant()));
            Assert.False(cultInvariantRegex.IsMatch(input.ToUpper(turkish)));

            Assert.False(turkishRegex.IsMatch(input.ToUpperInvariant()));
            Assert.True(turkishRegex.IsMatch(input.ToUpper(turkish)));
        }
Ejemplo n.º 5
0
        public async Task Replace(RegexEngine engine, string pattern, string input, string replacement, RegexOptions options, int count, int start, string expected)
        {
            // A few tests exceed the 1000 limit, they reach 6003
            RegexHelpers.SetSafeSizeThreshold(6005);
            Regex r;

            try
            {
                r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
            }
            finally
            {
                RegexHelpers.RestoreSafeSizeThresholdToDefault();
            }

            bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
            bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count);

            if (isDefaultStart)
            {
                if (isDefaultCount)
                {
                    Assert.Equal(expected, r.Replace(input, replacement));
                    Assert.Equal(expected, Regex.Replace(input, pattern, replacement, options));
                }

                Assert.Equal(expected, r.Replace(input, replacement, count));
            }

            Assert.Equal(expected, r.Replace(input, replacement, count, start));
        }
Ejemplo n.º 6
0
        public async Task Count_Timeout_ThrowsAfterTooLongExecution(RegexEngine engine)
        {
            if (RegexHelpers.IsNonBacktracking(engine))
            {
                // Test relies on backtracking taking a long time
                return;
            }

            const string Pattern = @"^(\w+\s?)*$";
            const string Input   = "An input string that takes a very very very very very very very very very very very long time!";

            Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.None, TimeSpan.FromMilliseconds(1));

            Stopwatch sw = Stopwatch.StartNew();

            Assert.Throws <RegexMatchTimeoutException>(() => r.Count(Input));
            Assert.Throws <RegexMatchTimeoutException>(() => r.Count(Input.AsSpan()));
            Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout

            switch (engine)
            {
            case RegexEngine.Interpreter:
            case RegexEngine.Compiled:
                sw = Stopwatch.StartNew();
                Assert.Throws <RegexMatchTimeoutException>(() => Regex.Count(Input, Pattern, RegexHelpers.OptionsFromEngine(engine), TimeSpan.FromMilliseconds(1)));
                Assert.Throws <RegexMatchTimeoutException>(() => Regex.Count(Input.AsSpan(), Pattern, RegexHelpers.OptionsFromEngine(engine), TimeSpan.FromMilliseconds(1)));
                Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10);     // arbitrary upper bound that should be well above what's needed with a 1ms timeout
                break;
            }
        }
Ejemplo n.º 7
0
        public async Task Match_In_Different_Cultures_CriticalCases(string pattern, RegexOptions options, RegexEngine engine, CultureInfo culture, string input, string match_expected)
        {
            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options, culture);

            Match match = r.Match(input);

            Assert.Equal(match_expected, match.Value);
        }
Ejemplo n.º 8
0
        public async Task Test(RegexEngine engine, RegexOptions options, string pattern, string input, string captures, string nonBacktrackingCaptures = null)
        {
            if (input == "NULL")
            {
                input = "";
            }

            bool   nonBacktracking = engine == RegexEngine.NonBacktracking;
            string expected        = nonBacktracking && nonBacktrackingCaptures != null ?
                                     nonBacktrackingCaptures : // nonBacktrackingCaptures value overrides the expected result in NonBacktracking mode
                                     captures;

            if (expected == "BADBR")
            {
                await Assert.ThrowsAnyAsync <ArgumentException>(async() => await RegexHelpers.GetRegexAsync(engine, pattern, options));

                return;
            }

            if (nonBacktracking && nonBacktrackingCaptures == "NONBACKTRACKINGINCOMPATIBLE")
            {
                // In particular: backreferences are not supported in NonBacktracking mode
                await Assert.ThrowsAnyAsync <NotSupportedException>(() => RegexHelpers.GetRegexAsync(engine, pattern, options));

                return;
            }

            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            if (expected == "NOMATCH")
            {
                Assert.False(r.IsMatch(input));
                return;
            }

            Match match = r.Match(input);

            Assert.True(match.Success);

            var expectedSet = new HashSet <(int start, int end)>(
                expected
                .Split(new[] { '(', ')' }, StringSplitOptions.RemoveEmptyEntries)
                .Select(s => s.Split(','))
                .Select(s => (start: int.Parse(s[0]), end: int.Parse(s[1]))));

            var actualSet = new HashSet <(int start, int end)>(
                match.Groups
                .Cast <Group>()
                .Select(g => (start: g.Index, end: g.Index + g.Length)));

            // NonBacktracking mode only provides the top-level match.
            // The .NET implementation sometimes has extra captures beyond what the original data specifies, so we assert a subset.
            if (nonBacktracking ? !actualSet.IsSubsetOf(expectedSet) : !expectedSet.IsSubsetOf(actualSet))
            {
                throw new Xunit.Sdk.XunitException($"Actual: {string.Join(", ", actualSet)}{Environment.NewLine}Expected: {string.Join(", ", expected)}");
            }
        }
Ejemplo n.º 9
0
        public async Task TurkishCulture_MatchesWordChar(RegexEngine engine, string input, RegexOptions options, string expectedResult)
        {
            using (new ThreadCultureChange(new CultureInfo("tr-TR")))
            {
                Regex regex = await RegexHelpers.GetRegexAsync(engine, @"\w*", options);

                Assert.Equal(expectedResult, regex.Match(input).Value);
            }
        }
Ejemplo n.º 10
0
 public async Task CharactersLowercasedOneByOne(RegexEngine engine)
 {
     using (new ThreadCultureChange("en-US"))
     {
         Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uD801\uDC00", RegexOptions.IgnoreCase)).IsMatch("\uD801\uDC00"));
         Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uD801\uDC00", RegexOptions.IgnoreCase)).IsMatch("abcdefg\uD801\uDC00"));
         Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uD801", RegexOptions.IgnoreCase)).IsMatch("\uD801\uDC00"));
         Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uDC00", RegexOptions.IgnoreCase)).IsMatch("\uD801\uDC00"));
     }
 }
Ejemplo n.º 11
0
        public void EnumerateMatches_Count(RegexEngine engine, string pattern, string input, int expectedCount)
        {
            Regex r     = RegexHelpers.GetRegexAsync(engine, pattern).GetAwaiter().GetResult();
            int   count = 0;

            foreach (ValueMatch _ in r.EnumerateMatches(input))
            {
                count++;
            }
            Assert.Equal(expectedCount, count);
        }
Ejemplo n.º 12
0
        public async Task TurkishCulture_Handling_Of_IgnoreCase(RegexEngine engine)
        {
            var    turkish = new CultureInfo("tr-TR");
            string input   = "I\u0131\u0130i";
            string pattern = "[H-J][\u0131-\u0140][\u0120-\u0130][h-j]";

            Regex regex = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.IgnoreCase, turkish);

            // The pattern must trivially match the input because all of the letters fall in the given intervals
            // Ignoring case can only add more letters here -- not REMOVE letters
            Assert.True(regex.IsMatch(input));
        }
Ejemplo n.º 13
0
        public async Task CharactersComparedOneByOne_AnchoredPattern(RegexEngine engine, string pattern, string input, string culture, RegexOptions options, bool expected)
        {
            // Regex compares characters one by one.  If that changes, it could impact the behavior of
            // a case like this, where these characters are not the same, but the strings compare
            // as equal with the invariant culture (and some other cultures as well).
            using (new ThreadCultureChange(culture))
            {
                Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

                Assert.Equal(expected, r.IsMatch(input));
            }
        }
Ejemplo n.º 14
0
        public void EnumerateMatches_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, int startat, RegexOptions options, int expectedCount)
        {
            Regex r = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult();

            int count;

            count = 0;
            foreach (ValueMatch _ in r.EnumerateMatches(input, startat))
            {
                count++;
            }
            Assert.Equal(expectedCount, count);

            bool isDefaultStartAt = startat == ((options & RegexOptions.RightToLeft) != 0 ? input.Length : 0);

            if (!isDefaultStartAt)
            {
                return;
            }

            if (options == RegexOptions.None && engine == RegexEngine.Interpreter)
            {
                count = 0;
                foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern))
                {
                    count++;
                }
                Assert.Equal(expectedCount, count);
            }

            switch (engine)
            {
            case RegexEngine.Interpreter:
            case RegexEngine.Compiled:
            case RegexEngine.NonBacktracking:
                RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine);
                count = 0;
                foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern, options | engineOptions))
                {
                    count++;
                }
                Assert.Equal(expectedCount, count);

                count = 0;
                foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout))
                {
                    count++;
                }
                Assert.Equal(expectedCount, count);
                break;
            }
        }
Ejemplo n.º 15
0
        public async Task Test(string pattern, string input, string captures)
        {
            if (input == "NULL")
            {
                input = "";
            }

            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
            {
                if (captures == "BADBR")
                {
                    await Assert.ThrowsAnyAsync <ArgumentException>(async() => (await RegexHelpers.GetRegexAsync(engine, pattern)).IsMatch(input));

                    return;
                }

                Regex r = await RegexHelpers.GetRegexAsync(engine, pattern);

                if (captures == "NOMATCH")
                {
                    Assert.False(r.IsMatch(input));
                    return;
                }

                Match match = r.Match(input);
                Assert.True(match.Success);

                var expected = new HashSet <(int start, int end)>(
                    captures
                    .Split(new[] { '(', ')' }, StringSplitOptions.RemoveEmptyEntries)
                    .Select(s => s.Split(','))
                    .Select(s => (start: int.Parse(s[0]), end: int.Parse(s[1])))
                    .Distinct()
                    .OrderBy(c => c.start)
                    .ThenBy(c => c.end));

                var actual = new HashSet <(int start, int end)>(
                    match.Groups
                    .Cast <Group>()
                    .Select(g => (start: g.Index, end: g.Index + g.Length))
                    .Distinct()
                    .OrderBy(g => g.start)
                    .ThenBy(g => g.end));

                // The .NET implementation sometimes has extra captures beyond what the original data specifies, so we assert a subset.
                if (!expected.IsSubsetOf(actual))
                {
                    throw new Xunit.Sdk.XunitException($"Actual: {string.Join(", ", actual)}{Environment.NewLine}Expected: {string.Join(", ", expected)}");
                }
            }
        }
Ejemplo n.º 16
0
        public void EnumerateMatches(RegexEngine engine, string pattern, string input, RegexOptions options, CaptureData[] expected)
        {
            Regex regexAdvanced      = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult();
            int   count              = 0;
            ReadOnlySpan <char> span = input.AsSpan();

            foreach (ValueMatch match in regexAdvanced.EnumerateMatches(span))
            {
                Assert.Equal(expected[count].Index, match.Index);
                Assert.Equal(expected[count].Length, match.Length);
                Assert.Equal(expected[count].Value, span.Slice(match.Index, match.Length).ToString());
                count++;
            }
            Assert.Equal(expected.Length, count);
        }
Ejemplo n.º 17
0
        public async Task UnicodeCategoriesInclusionsExpected(RegexEngine engine, string generalCategory, UnicodeCategory unicodeCategory)
        {
            Regex r;

            char[] allChars              = Enumerable.Range(0, char.MaxValue + 1).Select(i => (char)i).ToArray();
            int    expectedInCategory    = allChars.Count(c => char.GetUnicodeCategory(c) == unicodeCategory);
            int    expectedNotInCategory = allChars.Length - expectedInCategory;

            r = await RegexHelpers.GetRegexAsync(engine, @$ "\p{{{generalCategory}}}");

            Assert.Equal(expectedInCategory, r.Matches(string.Concat(allChars)).Count);

            r = await RegexHelpers.GetRegexAsync(engine, (@$ "\P{{{generalCategory}}}"));

            Assert.Equal(expectedNotInCategory, r.Matches(string.Concat(allChars)).Count);
        }
Ejemplo n.º 18
0
        private static async Task ValidateSetAsync(string regex, RegexOptions options, HashSet <char> included, HashSet <char> excluded, bool validateEveryChar = false)
        {
            Assert.True((included != null) ^ (excluded != null));

            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
            {
                Regex r = await RegexHelpers.GetRegexAsync(engine, regex, options);

                if (validateEveryChar)
                {
                    for (int i = 0; i <= char.MaxValue; i++)
                    {
                        bool actual   = r.IsMatch(((char)i).ToString());
                        bool expected = included != null?included.Contains((char)i) : !excluded.Contains((char)i);

                        if (actual != expected)
                        {
                            Fail(i);
                        }
                    }
                }
                else if (included != null)
                {
                    foreach (char c in included)
                    {
                        if (!r.IsMatch(c.ToString()))
                        {
                            Fail(c);
                        }
                    }
                }
                else
                {
                    foreach (char c in excluded)
                    {
                        if (r.IsMatch(c.ToString()))
                        {
                            Fail(c);
                        }
                    }
                }
            }

            void Fail(int c) => throw new XunitException($"Set=\"{regex}\", Options=\"{options}\", {c:X4} => '{(char)c}'");
        }
Ejemplo n.º 19
0
        public void EnumerateMatches_CheckIndex(RegexEngine engine)
        {
            const string Pattern = @"e{2}\w\b";
            const string Input   = "needing a reed";

            Regex r     = RegexHelpers.GetRegexAsync(engine, Pattern).GetAwaiter().GetResult();
            int   count = 0;

            string[]            expectedMatches = new[] { "eed" };
            int[]               expectedIndex   = new[] { 11 };
            ReadOnlySpan <char> span            = Input.AsSpan();

            foreach (ValueMatch match in r.EnumerateMatches(span))
            {
                Assert.Equal(expectedMatches[count], span.Slice(match.Index, match.Length).ToString());
                Assert.Equal(expectedIndex[count++], match.Index);
            }
        }
Ejemplo n.º 20
0
        public async Task Split(RegexEngine engine, string pattern, string input, RegexOptions options, int count, int start, string[] expected)
        {
            bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
            bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count);

            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            if (isDefaultStart && isDefaultCount)
            {
                Assert.Equal(expected, r.Split(input));
            }

            if (isDefaultStart)
            {
                Assert.Equal(expected, r.Split(input, count));
            }

            Assert.Equal(expected, r.Split(input, count, start));
        }
Ejemplo n.º 21
0
        public async Task Replace_MatchEvaluator_Test(RegexEngine engine, string pattern, string input, MatchEvaluator evaluator, RegexOptions options, int count, int start, string expected)
        {
            bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
            bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count);

            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            if (isDefaultStart && isDefaultCount)
            {
                Assert.Equal(expected, r.Replace(input, evaluator));
            }

            if (isDefaultStart)
            {
                Assert.Equal(expected, r.Replace(input, evaluator, count));
            }

            Assert.Equal(expected, r.Replace(input, evaluator, count, start));
        }
Ejemplo n.º 22
0
        public async Task EnginesThrowNotImplementedForGoAndFFC(RegexEngine engine)
        {
            Regex re = await RegexHelpers.GetRegexAsync(engine, @"abc");

            // Use reflection to ensure the runner is created so it can be fetched.
            MethodInfo  createRunnerMethod = typeof(Regex).GetMethod("CreateRunner", BindingFlags.Instance | BindingFlags.NonPublic);
            RegexRunner runner             = createRunnerMethod.Invoke(re, new object[] { }) as RegexRunner;

            // Use reflection to call Go and FFC and ensure it throws NotImplementedException
            MethodInfo goMethod  = typeof(RegexRunner).GetMethod("Go", BindingFlags.Instance | BindingFlags.NonPublic);
            MethodInfo ffcMethod = typeof(RegexRunner).GetMethod("FindFirstChar", BindingFlags.Instance | BindingFlags.NonPublic);

            // FindFirstChar and Go methods should not be implemented since built-in engines should be overriding and using Scan instead.
            TargetInvocationException goInvocationException = Assert.Throws <TargetInvocationException>(() => goMethod.Invoke(runner, new object[] { }));

            Assert.Equal(typeof(NotImplementedException), goInvocationException.InnerException.GetType());
            TargetInvocationException ffcInvocationException = Assert.Throws <TargetInvocationException>(() => ffcMethod.Invoke(runner, new object[] { }));

            Assert.Equal(typeof(NotImplementedException), ffcInvocationException.InnerException.GetType());
        }
Ejemplo n.º 23
0
        public async Task Replace(RegexEngine engine, string pattern, string input, string replacement, RegexOptions options, int count, int start, string expected)
        {
            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
            bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count);

            if (isDefaultStart)
            {
                if (isDefaultCount)
                {
                    Assert.Equal(expected, r.Replace(input, replacement));
                    Assert.Equal(expected, Regex.Replace(input, pattern, replacement, options));
                }

                Assert.Equal(expected, r.Replace(input, replacement, count));
            }

            Assert.Equal(expected, r.Replace(input, replacement, count, start));
        }
Ejemplo n.º 24
0
        public async Task Matches_MultipleCapturingGroups(RegexEngine engine)
        {
            string[] expectedGroupValues        = { "abracadabra", "abra", "cad" };
            string[] expectedGroupCaptureValues = { "abracad", "abra" };

            // Another example - given by Brad Merril in an article on RegularExpressions
            Regex regex = await RegexHelpers.GetRegexAsync(engine, @"(abra(cad)?)+");

            string input = "abracadabra1abracadabra2abracadabra3";
            Match  match = regex.Match(input);

            while (match.Success)
            {
                string expected = "abracadabra";
                RegexAssert.Equal(expected, match);
                if (!RegexHelpers.IsNonBacktracking(engine))
                {
                    Assert.Equal(3, match.Groups.Count);
                    for (int i = 0; i < match.Groups.Count; i++)
                    {
                        RegexAssert.Equal(expectedGroupValues[i], match.Groups[i]);
                        if (i == 1)
                        {
                            Assert.Equal(2, match.Groups[i].Captures.Count);
                            for (int j = 0; j < match.Groups[i].Captures.Count; j++)
                            {
                                RegexAssert.Equal(expectedGroupCaptureValues[j], match.Groups[i].Captures[j]);
                            }
                        }
                        else if (i == 2)
                        {
                            Assert.Equal(1, match.Groups[i].Captures.Count);
                            RegexAssert.Equal("cad", match.Groups[i].Captures[0]);
                        }
                    }
                    Assert.Equal(1, match.Captures.Count);
                    RegexAssert.Equal("abracadabra", match.Captures[0]);
                }
                match = match.NextMatch();
            }
        }
Ejemplo n.º 25
0
        public async Task WideLatin(RegexEngine engine)
        {
            const string OrigPattern = @"abc";

            //shift each char in the pattern to the Wide-Latin alphabet of Unicode
            string pattern_WL = new string(Array.ConvertAll(OrigPattern.ToCharArray(), c => (char)((int)c + 0xFF00 - 32)));
            string pattern    = $"({OrigPattern}==={pattern_WL})+";

            var re = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.IgnoreCase);

            string input = $"====={OrigPattern.ToUpper()}==={pattern_WL}{OrigPattern}==={pattern_WL.ToUpper()}==={OrigPattern}==={OrigPattern}";

            var match1 = re.Match(input);

            Assert.True(match1.Success);
            Assert.Equal(5, match1.Index);
            Assert.Equal(2 * (OrigPattern.Length + 3 + pattern_WL.Length), match1.Length);

            var match2 = match1.NextMatch();

            Assert.False(match2.Success);
        }
Ejemplo n.º 26
0
        public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, int startat, RegexOptions options, int expectedCount)
        {
            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);

            Assert.Equal(expectedCount, r.Count(input.AsSpan(), startat));
            Assert.Equal(r.Count(input.AsSpan(), startat), r.Matches(input, startat).Count);

            bool isDefaultStartAt = startat == ((options & RegexOptions.RightToLeft) != 0 ? input.Length : 0);

            if (!isDefaultStartAt)
            {
                return;
            }

            Assert.Equal(expectedCount, r.Count(input));
            Assert.Equal(expectedCount, r.Count(input.AsSpan()));
            Assert.Equal(r.Count(input), r.Matches(input).Count);
            Assert.Equal(r.Count(input.AsSpan()), r.Matches(input).Count);

            if (options == RegexOptions.None && engine == RegexEngine.Interpreter)
            {
                Assert.Equal(expectedCount, Regex.Count(input, pattern));
                Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern));
            }

            switch (engine)
            {
            case RegexEngine.Interpreter:
            case RegexEngine.Compiled:
            case RegexEngine.NonBacktracking:
                RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine);
                Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions));
                Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions));
                Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout));
                Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions, Regex.InfiniteMatchTimeout));
                break;
            }
        }
Ejemplo n.º 27
0
        public void EnumerateMatches_Lookahead(RegexEngine engine)
        {
            if (RegexHelpers.IsNonBacktracking(engine))
            {
                // lookaheads not supported
                return;
            }

            const string Pattern = @"\b(?!un)\w+\b";
            const string Input   = "unite one unethical ethics use untie ultimate";

            Regex r     = RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase).GetAwaiter().GetResult();
            int   count = 0;

            string[]            expectedMatches = new[] { "one", "ethics", "use", "ultimate" };
            ReadOnlySpan <char> span            = Input.AsSpan();

            foreach (ValueMatch match in r.EnumerateMatches(span))
            {
                Assert.Equal(expectedMatches[count++], span.Slice(match.Index, match.Length).ToString());
            }
            Assert.Equal(4, count);
        }
Ejemplo n.º 28
0
        public void EnumerateMatches_Lookbehind(RegexEngine engine)
        {
            if (RegexHelpers.IsNonBacktracking(engine))
            {
                // lookbehinds not supported
                return;
            }

            const string Pattern = @"(?<=\b20)\d{2}\b";
            const string Input   = "2010 1999 1861 2140 2009";

            Regex r     = RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase).GetAwaiter().GetResult();
            int   count = 0;

            string[]            expectedMatches = new[] { "10", "09" };
            ReadOnlySpan <char> span            = Input.AsSpan();

            foreach (ValueMatch match in r.EnumerateMatches(span))
            {
                Assert.Equal(expectedMatches[count++], span.Slice(match.Index, match.Length).ToString());
            }
            Assert.Equal(2, count);
        }
Ejemplo n.º 29
0
        public async Task EnsureRunmatchValueIsNulledAfterIsMatch(RegexEngine engine)
        {
            Regex re = await RegexHelpers.GetRegexAsync(engine, @"abc");

            // First call IsMatch which should initialize runmatch on the runner.
            Assert.True(re.IsMatch("abcabcabc"));

            // Ensure runmatch wasn't nulled out, since after calling IsMatch it should be reused.
            FieldInfo   runnerField   = typeof(Regex).GetField("_runner", BindingFlags.Instance | BindingFlags.NonPublic);
            RegexRunner runner        = runnerField.GetValue(re) as RegexRunner;
            FieldInfo   runmatchField = typeof(RegexRunner).GetField("runmatch", BindingFlags.Instance | BindingFlags.NonPublic);
            Match       runmatch      = runmatchField.GetValue(runner) as Match;

            Assert.NotNull(runmatch);

            // Ensure that the Value of runmatch was nulled out, so as to not keep a reference to it in a cache.
            MethodInfo getTextMethod = typeof(Match).GetMethod("get_Text", BindingFlags.Instance | BindingFlags.NonPublic);

            Assert.Null(getTextMethod.Invoke(runmatch, new object[] { }));
            Assert.Equal(string.Empty, runmatch.Value);
#if NET7_0_OR_GREATER
            Assert.True(runmatch.ValueSpan == ReadOnlySpan <char> .Empty);
#endif
        }
Ejemplo n.º 30
0
        public async Task RegexUnicodeChar(RegexEngine engine)
        {
            // Regex engine is Unicode aware now for the \w and \d character classes
            // \s is not - i.e. it still only recognizes the ASCII space separators, not Unicode ones
            // The new character classes for this:
            // [\p{L1}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Pc}]
            List <char> validChars   = new List <char>();
            List <char> invalidChars = new List <char>();

            for (int i = 0; i < MaxUnicodeRange; i++)
            {
                char c = (char)i;
                switch (CharUnicodeInfo.GetUnicodeCategory(c))
                {
                case UnicodeCategory.UppercaseLetter:            //Lu
                case UnicodeCategory.LowercaseLetter:            //Li
                case UnicodeCategory.TitlecaseLetter:            // Lt
                case UnicodeCategory.ModifierLetter:             // Lm
                case UnicodeCategory.OtherLetter:                // Lo
                case UnicodeCategory.DecimalDigitNumber:         // Nd
                //                    case UnicodeCategory.LetterNumber:           // ??
                //                    case UnicodeCategory.OtherNumber:            // ??
                case UnicodeCategory.NonSpacingMark:
                //                    case UnicodeCategory.SpacingCombiningMark:   // Mc
                case UnicodeCategory.ConnectorPunctuation:       // Pc
                    validChars.Add(c);
                    break;

                default:
                    invalidChars.Add(c);
                    break;
                }
            }

            // \w - we will create strings from valid characters that form \w and make sure that the regex engine catches this.
            // Build a random string with valid characters followed by invalid characters
            Random random = new Random(-55);
            Regex  regex  = await RegexHelpers.GetRegexAsync(engine, @"\w*");

            int validCharLength   = 10;
            int invalidCharLength = 15;

            for (int i = 0; i < 100; i++)
            {
                var builder1 = new StringBuilder();
                var builder2 = new StringBuilder();

                for (int j = 0; j < validCharLength; j++)
                {
                    char c = validChars[random.Next(validChars.Count)];
                    builder1.Append(c);
                    builder2.Append(c);
                }

                for (int j = 0; j < invalidCharLength; j++)
                {
                    builder1.Append(invalidChars[random.Next(invalidChars.Count)]);
                }

                string input = builder1.ToString();
                Match  match = regex.Match(input);
                Assert.True(match.Success);

                Assert.Equal(builder2.ToString(), match.Value);
                Assert.Equal(0, match.Index);
                Assert.Equal(validCharLength, match.Length);

                match = match.NextMatch();
                do
                {
                    // We get empty matches for each of the non-matching characters of input to match
                    // the * wildcard in regex pattern.
                    Assert.Equal(string.Empty, match.Value);
                    Assert.Equal(0, match.Length);
                    match = match.NextMatch();
                } while (match.Success);
            }

            // Build a random string with invalid characters followed by valid characters and then again invalid
            random = new Random(-55);
            regex  = await RegexHelpers.GetRegexAsync(engine, @"\w+");

            validCharLength   = 10;
            invalidCharLength = 15;

            for (int i = 0; i < 500; i++)
            {
                var builder1 = new StringBuilder();
                var builder2 = new StringBuilder();

                for (int j = 0; j < invalidCharLength; j++)
                {
                    builder1.Append(invalidChars[random.Next(invalidChars.Count)]);
                }

                for (int j = 0; j < validCharLength; j++)
                {
                    char c = validChars[random.Next(validChars.Count)];
                    builder1.Append(c);
                    builder2.Append(c);
                }

                for (int j = 0; j < invalidCharLength; j++)
                {
                    builder1.Append(invalidChars[random.Next(invalidChars.Count)]);
                }

                string input = builder1.ToString();

                Match match = regex.Match(input);
                Assert.True(match.Success);

                Assert.Equal(builder2.ToString(), match.Value);
                Assert.Equal(invalidCharLength, match.Index);
                Assert.Equal(validCharLength, match.Length);

                match = match.NextMatch();
                Assert.False(match.Success);
            }

            validChars   = new List <char>();
            invalidChars = new List <char>();
            for (int i = 0; i < MaxUnicodeRange; i++)
            {
                char c = (char)i;
                if (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.DecimalDigitNumber)
                {
                    validChars.Add(c);
                }
                else
                {
                    invalidChars.Add(c);
                }
            }

            // \d - we will create strings from valid characters that form \d and make sure that the regex engine catches this.
            // Build a random string with valid characters and then again invalid
            regex = await RegexHelpers.GetRegexAsync(engine, @"\d+");

            validCharLength   = 10;
            invalidCharLength = 15;

            for (int i = 0; i < 100; i++)
            {
                var builder1 = new StringBuilder();
                var builder2 = new StringBuilder();

                for (int j = 0; j < validCharLength; j++)
                {
                    char c = validChars[random.Next(validChars.Count)];
                    builder1.Append(c);
                    builder2.Append(c);
                }

                for (int j = 0; j < invalidCharLength; j++)
                {
                    builder1.Append(invalidChars[random.Next(invalidChars.Count)]);
                }

                string input = builder1.ToString();
                Match  match = regex.Match(input);


                Assert.Equal(builder2.ToString(), match.Value);
                Assert.Equal(0, match.Index);
                Assert.Equal(validCharLength, match.Length);

                match = match.NextMatch();
                Assert.False(match.Success);
            }

            // Build a random string with invalid characters, valid and then again invalid
            regex = await RegexHelpers.GetRegexAsync(engine, @"\d+");

            validCharLength   = 10;
            invalidCharLength = 15;

            for (int i = 0; i < 100; i++)
            {
                var builder1 = new StringBuilder();
                var builder2 = new StringBuilder();

                for (int j = 0; j < invalidCharLength; j++)
                {
                    builder1.Append(invalidChars[random.Next(invalidChars.Count)]);
                }

                for (int j = 0; j < validCharLength; j++)
                {
                    char c = validChars[random.Next(validChars.Count)];
                    builder1.Append(c);
                    builder2.Append(c);
                }

                for (int j = 0; j < invalidCharLength; j++)
                {
                    builder1.Append(invalidChars[random.Next(invalidChars.Count)]);
                }

                string input = builder1.ToString();

                Match match = regex.Match(input);
                Assert.True(match.Success);

                Assert.Equal(builder2.ToString(), match.Value);
                Assert.Equal(invalidCharLength, match.Index);
                Assert.Equal(validCharLength, match.Length);

                match = match.NextMatch();
                Assert.False(match.Success);
            }
        }