public async Task Matches(RegexEngine engine, string pattern, string input, RegexOptions options, CaptureData[] expected) { Regex regexAdvanced = await RegexHelpers.GetRegexAsync(engine, pattern, options); VerifyMatches(regexAdvanced.Matches(input), expected); VerifyMatches(regexAdvanced.Match(input), expected); }
public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, RegexOptions options, int expectedCount) { Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Assert.Equal(expectedCount, r.Count(input)); Assert.Equal(expectedCount, r.Count(input.AsSpan())); Assert.Equal(r.Count(input), r.Matches(input).Count); Assert.Equal(r.Count(input.AsSpan()), r.Matches(input).Count); if (options == RegexOptions.None && engine == RegexEngine.Interpreter) { Assert.Equal(expectedCount, Regex.Count(input, pattern)); Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern)); } switch (engine) { case RegexEngine.Interpreter: case RegexEngine.Compiled: case RegexEngine.NonBacktracking: RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine); Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions)); Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions)); Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout)); Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions, Regex.InfiniteMatchTimeout)); break; } }
public async Task CharactersComparedOneByOne_Invariant(RegexEngine engine, RegexOptions options) { // Regex compares characters one by one. If that changes, it could impact the behavior of // a case like this, where these characters are not the same, but the strings compare // as equal with the invariant culture (and some other cultures as well). const string S1 = "\u00D6\u200D"; const string S2 = "\u004F\u0308"; // Validate the chosen strings to make sure they compare the way we want to test via Regex Assert.False(S1[0] == S2[0]); Assert.False(S1[1] == S2[1]); Assert.StartsWith(S1, S2, StringComparison.InvariantCulture); Assert.True(S1.Equals(S2, StringComparison.InvariantCulture)); // Test varying lengths of strings to validate codegen changes that kick in at longer lengths foreach (int multiple in new[] { 1, 10, 100 }) { string pattern = string.Concat(Enumerable.Repeat(S1, multiple)); string input = string.Concat(Enumerable.Repeat(S2, multiple)); Regex r; // Validate when the string is at the beginning of the pattern, as it impacts prefix matching. r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Assert.False(r.IsMatch(input)); Assert.True(r.IsMatch(pattern)); // Validate when it's not at the beginning of the pattern, as it impacts "multi" matching. r = await RegexHelpers.GetRegexAsync(engine, "[abc]" + pattern, options); Assert.False(r.IsMatch("a" + input)); Assert.True(r.IsMatch("a" + pattern)); } }
public async Task TurkishI_Is_Differently_LowerUpperCased_In_Turkish_Culture_NonBacktracking() { var turkish = new CultureInfo("tr-TR"); string input = "I\u0131\u0130i"; // Use the input as the regex also // Ignore the Compiled option here because it is a noop in combination with NonBacktracking Regex cultInvariantRegex = await RegexHelpers.GetRegexAsync(RegexEngine.NonBacktracking, input, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, CultureInfo.InvariantCulture); Regex turkishRegex = await RegexHelpers.GetRegexAsync(RegexEngine.NonBacktracking, input, RegexOptions.IgnoreCase, turkish); Assert.True(cultInvariantRegex.IsMatch(input)); Assert.True(turkishRegex.IsMatch(input)); // <---------- This result differs from the result in the previous test!!! // As above and no surprises here // The regexes recognize different lowercase variants of different versions of i differently Assert.True(cultInvariantRegex.IsMatch(input.ToLowerInvariant())); Assert.False(cultInvariantRegex.IsMatch(input.ToLower(turkish))); Assert.False(turkishRegex.IsMatch(input.ToLowerInvariant())); Assert.True(turkishRegex.IsMatch(input.ToLower(turkish))); // The same holds symmetrically for ToUpper Assert.True(cultInvariantRegex.IsMatch(input.ToUpperInvariant())); Assert.False(cultInvariantRegex.IsMatch(input.ToUpper(turkish))); Assert.False(turkishRegex.IsMatch(input.ToUpperInvariant())); Assert.True(turkishRegex.IsMatch(input.ToUpper(turkish))); }
public async Task Replace(RegexEngine engine, string pattern, string input, string replacement, RegexOptions options, int count, int start, string expected) { // A few tests exceed the 1000 limit, they reach 6003 RegexHelpers.SetSafeSizeThreshold(6005); Regex r; try { r = await RegexHelpers.GetRegexAsync(engine, pattern, options); } finally { RegexHelpers.RestoreSafeSizeThresholdToDefault(); } bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); if (isDefaultStart) { if (isDefaultCount) { Assert.Equal(expected, r.Replace(input, replacement)); Assert.Equal(expected, Regex.Replace(input, pattern, replacement, options)); } Assert.Equal(expected, r.Replace(input, replacement, count)); } Assert.Equal(expected, r.Replace(input, replacement, count, start)); }
public async Task Count_Timeout_ThrowsAfterTooLongExecution(RegexEngine engine) { if (RegexHelpers.IsNonBacktracking(engine)) { // Test relies on backtracking taking a long time return; } const string Pattern = @"^(\w+\s?)*$"; const string Input = "An input string that takes a very very very very very very very very very very very long time!"; Regex r = await RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.None, TimeSpan.FromMilliseconds(1)); Stopwatch sw = Stopwatch.StartNew(); Assert.Throws <RegexMatchTimeoutException>(() => r.Count(Input)); Assert.Throws <RegexMatchTimeoutException>(() => r.Count(Input.AsSpan())); Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout switch (engine) { case RegexEngine.Interpreter: case RegexEngine.Compiled: sw = Stopwatch.StartNew(); Assert.Throws <RegexMatchTimeoutException>(() => Regex.Count(Input, Pattern, RegexHelpers.OptionsFromEngine(engine), TimeSpan.FromMilliseconds(1))); Assert.Throws <RegexMatchTimeoutException>(() => Regex.Count(Input.AsSpan(), Pattern, RegexHelpers.OptionsFromEngine(engine), TimeSpan.FromMilliseconds(1))); Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout break; } }
public async Task Match_In_Different_Cultures_CriticalCases(string pattern, RegexOptions options, RegexEngine engine, CultureInfo culture, string input, string match_expected) { Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options, culture); Match match = r.Match(input); Assert.Equal(match_expected, match.Value); }
public async Task Test(RegexEngine engine, RegexOptions options, string pattern, string input, string captures, string nonBacktrackingCaptures = null) { if (input == "NULL") { input = ""; } bool nonBacktracking = engine == RegexEngine.NonBacktracking; string expected = nonBacktracking && nonBacktrackingCaptures != null ? nonBacktrackingCaptures : // nonBacktrackingCaptures value overrides the expected result in NonBacktracking mode captures; if (expected == "BADBR") { await Assert.ThrowsAnyAsync <ArgumentException>(async() => await RegexHelpers.GetRegexAsync(engine, pattern, options)); return; } if (nonBacktracking && nonBacktrackingCaptures == "NONBACKTRACKINGINCOMPATIBLE") { // In particular: backreferences are not supported in NonBacktracking mode await Assert.ThrowsAnyAsync <NotSupportedException>(() => RegexHelpers.GetRegexAsync(engine, pattern, options)); return; } Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); if (expected == "NOMATCH") { Assert.False(r.IsMatch(input)); return; } Match match = r.Match(input); Assert.True(match.Success); var expectedSet = new HashSet <(int start, int end)>( expected .Split(new[] { '(', ')' }, StringSplitOptions.RemoveEmptyEntries) .Select(s => s.Split(',')) .Select(s => (start: int.Parse(s[0]), end: int.Parse(s[1])))); var actualSet = new HashSet <(int start, int end)>( match.Groups .Cast <Group>() .Select(g => (start: g.Index, end: g.Index + g.Length))); // NonBacktracking mode only provides the top-level match. // The .NET implementation sometimes has extra captures beyond what the original data specifies, so we assert a subset. if (nonBacktracking ? !actualSet.IsSubsetOf(expectedSet) : !expectedSet.IsSubsetOf(actualSet)) { throw new Xunit.Sdk.XunitException($"Actual: {string.Join(", ", actualSet)}{Environment.NewLine}Expected: {string.Join(", ", expected)}"); } }
public async Task TurkishCulture_MatchesWordChar(RegexEngine engine, string input, RegexOptions options, string expectedResult) { using (new ThreadCultureChange(new CultureInfo("tr-TR"))) { Regex regex = await RegexHelpers.GetRegexAsync(engine, @"\w*", options); Assert.Equal(expectedResult, regex.Match(input).Value); } }
public async Task CharactersLowercasedOneByOne(RegexEngine engine) { using (new ThreadCultureChange("en-US")) { Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uD801\uDC00", RegexOptions.IgnoreCase)).IsMatch("\uD801\uDC00")); Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uD801\uDC00", RegexOptions.IgnoreCase)).IsMatch("abcdefg\uD801\uDC00")); Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uD801", RegexOptions.IgnoreCase)).IsMatch("\uD801\uDC00")); Assert.True((await RegexHelpers.GetRegexAsync(engine, "\uDC00", RegexOptions.IgnoreCase)).IsMatch("\uD801\uDC00")); } }
public void EnumerateMatches_Count(RegexEngine engine, string pattern, string input, int expectedCount) { Regex r = RegexHelpers.GetRegexAsync(engine, pattern).GetAwaiter().GetResult(); int count = 0; foreach (ValueMatch _ in r.EnumerateMatches(input)) { count++; } Assert.Equal(expectedCount, count); }
public async Task TurkishCulture_Handling_Of_IgnoreCase(RegexEngine engine) { var turkish = new CultureInfo("tr-TR"); string input = "I\u0131\u0130i"; string pattern = "[H-J][\u0131-\u0140][\u0120-\u0130][h-j]"; Regex regex = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.IgnoreCase, turkish); // The pattern must trivially match the input because all of the letters fall in the given intervals // Ignoring case can only add more letters here -- not REMOVE letters Assert.True(regex.IsMatch(input)); }
public async Task CharactersComparedOneByOne_AnchoredPattern(RegexEngine engine, string pattern, string input, string culture, RegexOptions options, bool expected) { // Regex compares characters one by one. If that changes, it could impact the behavior of // a case like this, where these characters are not the same, but the strings compare // as equal with the invariant culture (and some other cultures as well). using (new ThreadCultureChange(culture)) { Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Assert.Equal(expected, r.IsMatch(input)); } }
public void EnumerateMatches_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, int startat, RegexOptions options, int expectedCount) { Regex r = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult(); int count; count = 0; foreach (ValueMatch _ in r.EnumerateMatches(input, startat)) { count++; } Assert.Equal(expectedCount, count); bool isDefaultStartAt = startat == ((options & RegexOptions.RightToLeft) != 0 ? input.Length : 0); if (!isDefaultStartAt) { return; } if (options == RegexOptions.None && engine == RegexEngine.Interpreter) { count = 0; foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern)) { count++; } Assert.Equal(expectedCount, count); } switch (engine) { case RegexEngine.Interpreter: case RegexEngine.Compiled: case RegexEngine.NonBacktracking: RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine); count = 0; foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern, options | engineOptions)) { count++; } Assert.Equal(expectedCount, count); count = 0; foreach (ValueMatch _ in Regex.EnumerateMatches(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout)) { count++; } Assert.Equal(expectedCount, count); break; } }
public async Task Test(string pattern, string input, string captures) { if (input == "NULL") { input = ""; } foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { if (captures == "BADBR") { await Assert.ThrowsAnyAsync <ArgumentException>(async() => (await RegexHelpers.GetRegexAsync(engine, pattern)).IsMatch(input)); return; } Regex r = await RegexHelpers.GetRegexAsync(engine, pattern); if (captures == "NOMATCH") { Assert.False(r.IsMatch(input)); return; } Match match = r.Match(input); Assert.True(match.Success); var expected = new HashSet <(int start, int end)>( captures .Split(new[] { '(', ')' }, StringSplitOptions.RemoveEmptyEntries) .Select(s => s.Split(',')) .Select(s => (start: int.Parse(s[0]), end: int.Parse(s[1]))) .Distinct() .OrderBy(c => c.start) .ThenBy(c => c.end)); var actual = new HashSet <(int start, int end)>( match.Groups .Cast <Group>() .Select(g => (start: g.Index, end: g.Index + g.Length)) .Distinct() .OrderBy(g => g.start) .ThenBy(g => g.end)); // The .NET implementation sometimes has extra captures beyond what the original data specifies, so we assert a subset. if (!expected.IsSubsetOf(actual)) { throw new Xunit.Sdk.XunitException($"Actual: {string.Join(", ", actual)}{Environment.NewLine}Expected: {string.Join(", ", expected)}"); } } }
public void EnumerateMatches(RegexEngine engine, string pattern, string input, RegexOptions options, CaptureData[] expected) { Regex regexAdvanced = RegexHelpers.GetRegexAsync(engine, pattern, options).GetAwaiter().GetResult(); int count = 0; ReadOnlySpan <char> span = input.AsSpan(); foreach (ValueMatch match in regexAdvanced.EnumerateMatches(span)) { Assert.Equal(expected[count].Index, match.Index); Assert.Equal(expected[count].Length, match.Length); Assert.Equal(expected[count].Value, span.Slice(match.Index, match.Length).ToString()); count++; } Assert.Equal(expected.Length, count); }
public async Task UnicodeCategoriesInclusionsExpected(RegexEngine engine, string generalCategory, UnicodeCategory unicodeCategory) { Regex r; char[] allChars = Enumerable.Range(0, char.MaxValue + 1).Select(i => (char)i).ToArray(); int expectedInCategory = allChars.Count(c => char.GetUnicodeCategory(c) == unicodeCategory); int expectedNotInCategory = allChars.Length - expectedInCategory; r = await RegexHelpers.GetRegexAsync(engine, @$ "\p{{{generalCategory}}}"); Assert.Equal(expectedInCategory, r.Matches(string.Concat(allChars)).Count); r = await RegexHelpers.GetRegexAsync(engine, (@$ "\P{{{generalCategory}}}")); Assert.Equal(expectedNotInCategory, r.Matches(string.Concat(allChars)).Count); }
private static async Task ValidateSetAsync(string regex, RegexOptions options, HashSet <char> included, HashSet <char> excluded, bool validateEveryChar = false) { Assert.True((included != null) ^ (excluded != null)); foreach (RegexEngine engine in RegexHelpers.AvailableEngines) { Regex r = await RegexHelpers.GetRegexAsync(engine, regex, options); if (validateEveryChar) { for (int i = 0; i <= char.MaxValue; i++) { bool actual = r.IsMatch(((char)i).ToString()); bool expected = included != null?included.Contains((char)i) : !excluded.Contains((char)i); if (actual != expected) { Fail(i); } } } else if (included != null) { foreach (char c in included) { if (!r.IsMatch(c.ToString())) { Fail(c); } } } else { foreach (char c in excluded) { if (r.IsMatch(c.ToString())) { Fail(c); } } } } void Fail(int c) => throw new XunitException($"Set=\"{regex}\", Options=\"{options}\", {c:X4} => '{(char)c}'"); }
public void EnumerateMatches_CheckIndex(RegexEngine engine) { const string Pattern = @"e{2}\w\b"; const string Input = "needing a reed"; Regex r = RegexHelpers.GetRegexAsync(engine, Pattern).GetAwaiter().GetResult(); int count = 0; string[] expectedMatches = new[] { "eed" }; int[] expectedIndex = new[] { 11 }; ReadOnlySpan <char> span = Input.AsSpan(); foreach (ValueMatch match in r.EnumerateMatches(span)) { Assert.Equal(expectedMatches[count], span.Slice(match.Index, match.Length).ToString()); Assert.Equal(expectedIndex[count++], match.Index); } }
public async Task Split(RegexEngine engine, string pattern, string input, RegexOptions options, int count, int start, string[] expected) { bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); if (isDefaultStart && isDefaultCount) { Assert.Equal(expected, r.Split(input)); } if (isDefaultStart) { Assert.Equal(expected, r.Split(input, count)); } Assert.Equal(expected, r.Split(input, count, start)); }
public async Task Replace_MatchEvaluator_Test(RegexEngine engine, string pattern, string input, MatchEvaluator evaluator, RegexOptions options, int count, int start, string expected) { bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); if (isDefaultStart && isDefaultCount) { Assert.Equal(expected, r.Replace(input, evaluator)); } if (isDefaultStart) { Assert.Equal(expected, r.Replace(input, evaluator, count)); } Assert.Equal(expected, r.Replace(input, evaluator, count, start)); }
public async Task EnginesThrowNotImplementedForGoAndFFC(RegexEngine engine) { Regex re = await RegexHelpers.GetRegexAsync(engine, @"abc"); // Use reflection to ensure the runner is created so it can be fetched. MethodInfo createRunnerMethod = typeof(Regex).GetMethod("CreateRunner", BindingFlags.Instance | BindingFlags.NonPublic); RegexRunner runner = createRunnerMethod.Invoke(re, new object[] { }) as RegexRunner; // Use reflection to call Go and FFC and ensure it throws NotImplementedException MethodInfo goMethod = typeof(RegexRunner).GetMethod("Go", BindingFlags.Instance | BindingFlags.NonPublic); MethodInfo ffcMethod = typeof(RegexRunner).GetMethod("FindFirstChar", BindingFlags.Instance | BindingFlags.NonPublic); // FindFirstChar and Go methods should not be implemented since built-in engines should be overriding and using Scan instead. TargetInvocationException goInvocationException = Assert.Throws <TargetInvocationException>(() => goMethod.Invoke(runner, new object[] { })); Assert.Equal(typeof(NotImplementedException), goInvocationException.InnerException.GetType()); TargetInvocationException ffcInvocationException = Assert.Throws <TargetInvocationException>(() => ffcMethod.Invoke(runner, new object[] { })); Assert.Equal(typeof(NotImplementedException), ffcInvocationException.InnerException.GetType()); }
public async Task Replace(RegexEngine engine, string pattern, string input, string replacement, RegexOptions options, int count, int start, string expected) { Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start); bool isDefaultCount = RegexHelpers.IsDefaultCount(input, options, count); if (isDefaultStart) { if (isDefaultCount) { Assert.Equal(expected, r.Replace(input, replacement)); Assert.Equal(expected, Regex.Replace(input, pattern, replacement, options)); } Assert.Equal(expected, r.Replace(input, replacement, count)); } Assert.Equal(expected, r.Replace(input, replacement, count, start)); }
public async Task Matches_MultipleCapturingGroups(RegexEngine engine) { string[] expectedGroupValues = { "abracadabra", "abra", "cad" }; string[] expectedGroupCaptureValues = { "abracad", "abra" }; // Another example - given by Brad Merril in an article on RegularExpressions Regex regex = await RegexHelpers.GetRegexAsync(engine, @"(abra(cad)?)+"); string input = "abracadabra1abracadabra2abracadabra3"; Match match = regex.Match(input); while (match.Success) { string expected = "abracadabra"; RegexAssert.Equal(expected, match); if (!RegexHelpers.IsNonBacktracking(engine)) { Assert.Equal(3, match.Groups.Count); for (int i = 0; i < match.Groups.Count; i++) { RegexAssert.Equal(expectedGroupValues[i], match.Groups[i]); if (i == 1) { Assert.Equal(2, match.Groups[i].Captures.Count); for (int j = 0; j < match.Groups[i].Captures.Count; j++) { RegexAssert.Equal(expectedGroupCaptureValues[j], match.Groups[i].Captures[j]); } } else if (i == 2) { Assert.Equal(1, match.Groups[i].Captures.Count); RegexAssert.Equal("cad", match.Groups[i].Captures[0]); } } Assert.Equal(1, match.Captures.Count); RegexAssert.Equal("abracadabra", match.Captures[0]); } match = match.NextMatch(); } }
public async Task WideLatin(RegexEngine engine) { const string OrigPattern = @"abc"; //shift each char in the pattern to the Wide-Latin alphabet of Unicode string pattern_WL = new string(Array.ConvertAll(OrigPattern.ToCharArray(), c => (char)((int)c + 0xFF00 - 32))); string pattern = $"({OrigPattern}==={pattern_WL})+"; var re = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.IgnoreCase); string input = $"====={OrigPattern.ToUpper()}==={pattern_WL}{OrigPattern}==={pattern_WL.ToUpper()}==={OrigPattern}==={OrigPattern}"; var match1 = re.Match(input); Assert.True(match1.Success); Assert.Equal(5, match1.Index); Assert.Equal(2 * (OrigPattern.Length + 3 + pattern_WL.Length), match1.Length); var match2 = match1.NextMatch(); Assert.False(match2.Success); }
public async Task Count_ReturnsExpectedCount(RegexEngine engine, string pattern, string input, int startat, RegexOptions options, int expectedCount) { Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options); Assert.Equal(expectedCount, r.Count(input.AsSpan(), startat)); Assert.Equal(r.Count(input.AsSpan(), startat), r.Matches(input, startat).Count); bool isDefaultStartAt = startat == ((options & RegexOptions.RightToLeft) != 0 ? input.Length : 0); if (!isDefaultStartAt) { return; } Assert.Equal(expectedCount, r.Count(input)); Assert.Equal(expectedCount, r.Count(input.AsSpan())); Assert.Equal(r.Count(input), r.Matches(input).Count); Assert.Equal(r.Count(input.AsSpan()), r.Matches(input).Count); if (options == RegexOptions.None && engine == RegexEngine.Interpreter) { Assert.Equal(expectedCount, Regex.Count(input, pattern)); Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern)); } switch (engine) { case RegexEngine.Interpreter: case RegexEngine.Compiled: case RegexEngine.NonBacktracking: RegexOptions engineOptions = RegexHelpers.OptionsFromEngine(engine); Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions)); Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions)); Assert.Equal(expectedCount, Regex.Count(input, pattern, options | engineOptions, Regex.InfiniteMatchTimeout)); Assert.Equal(expectedCount, Regex.Count(input.AsSpan(), pattern, options | engineOptions, Regex.InfiniteMatchTimeout)); break; } }
public void EnumerateMatches_Lookahead(RegexEngine engine) { if (RegexHelpers.IsNonBacktracking(engine)) { // lookaheads not supported return; } const string Pattern = @"\b(?!un)\w+\b"; const string Input = "unite one unethical ethics use untie ultimate"; Regex r = RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase).GetAwaiter().GetResult(); int count = 0; string[] expectedMatches = new[] { "one", "ethics", "use", "ultimate" }; ReadOnlySpan <char> span = Input.AsSpan(); foreach (ValueMatch match in r.EnumerateMatches(span)) { Assert.Equal(expectedMatches[count++], span.Slice(match.Index, match.Length).ToString()); } Assert.Equal(4, count); }
public void EnumerateMatches_Lookbehind(RegexEngine engine) { if (RegexHelpers.IsNonBacktracking(engine)) { // lookbehinds not supported return; } const string Pattern = @"(?<=\b20)\d{2}\b"; const string Input = "2010 1999 1861 2140 2009"; Regex r = RegexHelpers.GetRegexAsync(engine, Pattern, RegexOptions.IgnoreCase).GetAwaiter().GetResult(); int count = 0; string[] expectedMatches = new[] { "10", "09" }; ReadOnlySpan <char> span = Input.AsSpan(); foreach (ValueMatch match in r.EnumerateMatches(span)) { Assert.Equal(expectedMatches[count++], span.Slice(match.Index, match.Length).ToString()); } Assert.Equal(2, count); }
public async Task EnsureRunmatchValueIsNulledAfterIsMatch(RegexEngine engine) { Regex re = await RegexHelpers.GetRegexAsync(engine, @"abc"); // First call IsMatch which should initialize runmatch on the runner. Assert.True(re.IsMatch("abcabcabc")); // Ensure runmatch wasn't nulled out, since after calling IsMatch it should be reused. FieldInfo runnerField = typeof(Regex).GetField("_runner", BindingFlags.Instance | BindingFlags.NonPublic); RegexRunner runner = runnerField.GetValue(re) as RegexRunner; FieldInfo runmatchField = typeof(RegexRunner).GetField("runmatch", BindingFlags.Instance | BindingFlags.NonPublic); Match runmatch = runmatchField.GetValue(runner) as Match; Assert.NotNull(runmatch); // Ensure that the Value of runmatch was nulled out, so as to not keep a reference to it in a cache. MethodInfo getTextMethod = typeof(Match).GetMethod("get_Text", BindingFlags.Instance | BindingFlags.NonPublic); Assert.Null(getTextMethod.Invoke(runmatch, new object[] { })); Assert.Equal(string.Empty, runmatch.Value); #if NET7_0_OR_GREATER Assert.True(runmatch.ValueSpan == ReadOnlySpan <char> .Empty); #endif }
public async Task RegexUnicodeChar(RegexEngine engine) { // Regex engine is Unicode aware now for the \w and \d character classes // \s is not - i.e. it still only recognizes the ASCII space separators, not Unicode ones // The new character classes for this: // [\p{L1}\p{Lu}\p{Lt}\p{Lo}\p{Nd}\p{Pc}] List <char> validChars = new List <char>(); List <char> invalidChars = new List <char>(); for (int i = 0; i < MaxUnicodeRange; i++) { char c = (char)i; switch (CharUnicodeInfo.GetUnicodeCategory(c)) { case UnicodeCategory.UppercaseLetter: //Lu case UnicodeCategory.LowercaseLetter: //Li case UnicodeCategory.TitlecaseLetter: // Lt case UnicodeCategory.ModifierLetter: // Lm case UnicodeCategory.OtherLetter: // Lo case UnicodeCategory.DecimalDigitNumber: // Nd // case UnicodeCategory.LetterNumber: // ?? // case UnicodeCategory.OtherNumber: // ?? case UnicodeCategory.NonSpacingMark: // case UnicodeCategory.SpacingCombiningMark: // Mc case UnicodeCategory.ConnectorPunctuation: // Pc validChars.Add(c); break; default: invalidChars.Add(c); break; } } // \w - we will create strings from valid characters that form \w and make sure that the regex engine catches this. // Build a random string with valid characters followed by invalid characters Random random = new Random(-55); Regex regex = await RegexHelpers.GetRegexAsync(engine, @"\w*"); int validCharLength = 10; int invalidCharLength = 15; for (int i = 0; i < 100; i++) { var builder1 = new StringBuilder(); var builder2 = new StringBuilder(); for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(validChars.Count)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidChars.Count)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.True(match.Success); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(0, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); do { // We get empty matches for each of the non-matching characters of input to match // the * wildcard in regex pattern. Assert.Equal(string.Empty, match.Value); Assert.Equal(0, match.Length); match = match.NextMatch(); } while (match.Success); } // Build a random string with invalid characters followed by valid characters and then again invalid random = new Random(-55); regex = await RegexHelpers.GetRegexAsync(engine, @"\w+"); validCharLength = 10; invalidCharLength = 15; for (int i = 0; i < 500; i++) { var builder1 = new StringBuilder(); var builder2 = new StringBuilder(); for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidChars.Count)]); } for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(validChars.Count)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidChars.Count)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.True(match.Success); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(invalidCharLength, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); Assert.False(match.Success); } validChars = new List <char>(); invalidChars = new List <char>(); for (int i = 0; i < MaxUnicodeRange; i++) { char c = (char)i; if (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.DecimalDigitNumber) { validChars.Add(c); } else { invalidChars.Add(c); } } // \d - we will create strings from valid characters that form \d and make sure that the regex engine catches this. // Build a random string with valid characters and then again invalid regex = await RegexHelpers.GetRegexAsync(engine, @"\d+"); validCharLength = 10; invalidCharLength = 15; for (int i = 0; i < 100; i++) { var builder1 = new StringBuilder(); var builder2 = new StringBuilder(); for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(validChars.Count)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidChars.Count)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(0, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); Assert.False(match.Success); } // Build a random string with invalid characters, valid and then again invalid regex = await RegexHelpers.GetRegexAsync(engine, @"\d+"); validCharLength = 10; invalidCharLength = 15; for (int i = 0; i < 100; i++) { var builder1 = new StringBuilder(); var builder2 = new StringBuilder(); for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidChars.Count)]); } for (int j = 0; j < validCharLength; j++) { char c = validChars[random.Next(validChars.Count)]; builder1.Append(c); builder2.Append(c); } for (int j = 0; j < invalidCharLength; j++) { builder1.Append(invalidChars[random.Next(invalidChars.Count)]); } string input = builder1.ToString(); Match match = regex.Match(input); Assert.True(match.Success); Assert.Equal(builder2.ToString(), match.Value); Assert.Equal(invalidCharLength, match.Index); Assert.Equal(validCharLength, match.Length); match = match.NextMatch(); Assert.False(match.Success); } }