static void Main(string[] args) { try { var iterations = 5; var builder = new StringBuilder(); var haybytes = System.IO.File.ReadAllBytes(@"..\..\mtent12.txt"); var watch = new Stopwatch(); watch.Start(); var haystring = Encoding.ASCII.GetString(haybytes); var encodetime = watch.Elapsed; watch.Reset(); Console.WriteLine("Text length: " + haystring.Length); Console.WriteLine("Encoding time: " + encodetime); Console.WriteLine(); var testcases = new TestCase[16] { new TestCase("Twain"), new TestCase("^Twain"), new TestCase("Twain$"), new TestCase("Huck[a-zA-Z]+|Finn[a-zA-Z]+"), new TestCase("a[^x]{20}b"), new TestCase("Tom|Sawyer|Huckleberry|Finn"), new TestCase(".{0,3}(Tom|Sawyer|Huckleberry|Finn)"), new TestCase("[a-zA-Z]+ing"), new TestCase("^[a-zA-Z]{0,4}ing[^a-zA-Z]"), new TestCase("[a-zA-Z]+ing$"), new TestCase("^[a-zA-Z ]{5,}$"), new TestCase("^.{16,20}$"), new TestCase("([a-f](.[d-m].){0,2}[h-n]){2}"), new TestCase("([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]"), new TestCase(@"""[^""]{0,30}[?!\.]"""), new TestCase("Tom.{10,25}river|river.{10,25}Tom") }; Console.Write("Running 'First Match' test..."); for(int i = 0; i < iterations; i++) foreach(var testcase in testcases) { var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1); var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline); var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline); watch.Start(); var re2ByteMatch = re2b.Match(haybytes); testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var re2StringMatch = re2s.Match(haystring); testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var netMatch = nets.Match(haystring); testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); if(re2ByteMatch.Value != re2StringMatch.Value) { Console.WriteLine(); Console.WriteLine("Match.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern); Console.WriteLine("This is not necessarily an error and may be due to accent characters."); Console.WriteLine("RE2 bytes value: " + re2ByteMatch.Value); Console.WriteLine("RE2 string value: " + re2StringMatch.Value); Console.WriteLine(); } if(re2StringMatch.Value != netMatch.Value) { Console.WriteLine(); Console.WriteLine("Match.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern); Console.WriteLine("This is not necessarily an error and may be due to accent characters."); Console.WriteLine("RE2 string value: " + re2StringMatch.Value); Console.WriteLine(".NET string value: " + netMatch.Value); Console.WriteLine(); } Assert.AreEqual(re2StringMatch.Index, netMatch.Index, "Match.Index: RE2 string, NET : " + re2b.Pattern); Assert.AreEqual(re2StringMatch.Length, netMatch.Length, "Match.Length: RE2 string, NET : " + re2b.Pattern); } Console.WriteLine("\n\nResults:\n\n"); PrintByteVsStringResults(testcases); Console.WriteLine("\n"); PrintStringVsStringResults(testcases); foreach(var testcase in testcases) testcase.Reset(); Console.Write("\n\nRunning 'All Matches' test..."); for(int i = 0; i < iterations; i++) foreach(var testcase in testcases) { var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1); var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline); var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline); watch.Start(); var re2ByteMatches = re2b.Matches(haybytes); // Matches() methods are lazily evaluated. testcase.Re2ByteMatchCount = re2ByteMatches.Count; testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var re2StringMatches = re2s.Matches(haystring); // Matches() methods are lazily evaluated. testcase.Re2StringMatchCount = re2StringMatches.Count; testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var netMatches = nets.Matches(haystring); // Matches() methods are lazily evaluated. testcase.NETMatchCount = netMatches.Count; testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); Assert.AreEqual(re2ByteMatches.Count, re2StringMatches.Count, "Match.Count: RE2 bytes, RE2 string : " + re2b.Pattern); Assert.AreEqual(re2ByteMatches.Count, netMatches.Count, "Match.Count: RE2 bytes, NET : " + re2b.Pattern); for(int j = 0; j < re2ByteMatches.Count; j++) { if(re2ByteMatches[j].Value != re2StringMatches[j].Value) { Console.WriteLine(); Console.WriteLine("Match.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern); Console.WriteLine("This is not necessarily an error and may be due to accent characters."); Console.WriteLine("RE2 bytes value: " + re2ByteMatches[j].Value); Console.WriteLine("RE2 string value: " + re2StringMatches[j].Value); Console.WriteLine(); } if(re2StringMatches[j].Value != netMatches[j].Value) { Console.WriteLine(); Console.WriteLine("Match.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern); Console.WriteLine("This is not necessarily an error and may be due to accent characters."); Console.WriteLine("RE2 string value: " + re2StringMatches[j].Value); Console.WriteLine(".NET string value: " + netMatches[j].Value); Console.WriteLine(); } } } Console.WriteLine("\n\nResults:\n\n"); PrintByteVsStringResults(testcases); Console.WriteLine("\n"); PrintStringVsStringResults(testcases); } catch(Exception ex) { Console.WriteLine(ex.Message + ex.StackTrace); } GC.Collect(); Console.WriteLine("Done."); Console.ReadLine(); }
static void Main(string[] args) { try { { Console.WriteLine("Running issue #1 test ..."); string source = "red car white car"; string pattern = @"(\w+)\s+(car)"; MatchCollection matches = Regex.Matches(source, pattern); Debug.Assert(matches.Count == 2); Debug.Assert(matches[0].Value == "red car"); Debug.Assert(matches[1].Value == "white car"); Debug.Assert(matches[0].Index == 0); Debug.Assert(matches[1].Index == 8); Console.WriteLine("\t... Success.\n"); Console.WriteLine("Running issue #2 test ..."); var r = new rr.Regex(@"\d*"); Debug.Assert(r.Match("123", 1).Index == 1); Debug.Assert(r.Match("123", 1).Length == 2); Debug.Assert(r.Match("123", 1).Value == "23"); Console.WriteLine("\t... Success.\n"); Console.WriteLine("Running issue #3 test ..."); int success = 0; for (uint i = 0; i < 100; ++i) { using (var re2 = new Regex("a")) { Match match = re2.Match("a"); if (match.Success) { success++; } } } Debug.Assert(success == 100); Console.WriteLine("\t... Success.\n"); } { Console.WriteLine("Running simple tests ..."); // Five matches, each with a value of "". Debug.Assert(Regex.Matches("xxxx", "").Count == 5); // Accessing an array (technically, the default Item property) with a negative index? Yup! Debug.Assert(Regex.Matches("xxxx", "")[0].Groups[-1].Value == ""); // The returned Match containes a Group collection, and the first item in the Group collection is the Match. Match match = Regex.Match("abcd", "abcd"); Debug.Assert((Group)match == match.Groups[0]); // .NET allows invalid UTF-16 strings (including freestanding codepoints). Debug.Assert(Regex.Match("\xD800", "\xD800").Length == 1); // The translation isn't consistent for invalid strings, though. Debug.Assert(Regex.Match(Encoding.UTF8.GetBytes("\xD800"), "\xD800") == Match.Empty); // 2-byte UTF-16 to 3-byte UTF-8. Debug.Assert(Regex.Match("水DŽ", "水DŽ").Length == 2); Debug.Assert(Regex.Match(Encoding.UTF8.GetBytes("水DŽ"), "水DŽ").Length == 5); // To the BMP ... and beyond! Debug.Assert(Regex.Match("xx𠜎𠜱𠝹𠱓", "𠜎𠜱𠝹𠱓").Value == "𠜎𠜱𠝹𠱓"); var r = new rr.Regex("𠝹𠱓"); // Disallow beginning searches in the middle of a UTF-16 surrogate pair. bool exception = false; try { r.Match("𠜎𠜱𠝹𠱓", 1); } catch (ArgumentException) { exception = true; } // Indices and lengths are reported as UTF-16 code units. Debug.Assert(r.Match("𠜎水𠜱𠝹𠱓", 5).Index == 5); Debug.Assert(r.Match("𠜎水𠜱𠝹𠱓", 5).Length == 4); Debug.Assert(exception); Console.WriteLine("\t... Success.\n"); } { Console.WriteLine("Running performance tests ...\n"); var builder = new StringBuilder(); var haybytes = System.IO.File.ReadAllBytes(@"..\..\mtent12.txt"); var watch = new Stopwatch(); watch.Start(); var haystring = Encoding.ASCII.GetString(haybytes); var encodetime = watch.Elapsed; watch.Reset(); Console.WriteLine("\tText length: " + haystring.Length); Console.WriteLine("\tEncoding time: " + encodetime); Console.WriteLine(); var testcases = new TestCase[16] { new TestCase("Twain"), new TestCase("^Twain"), new TestCase("Twain$"), new TestCase("Huck[a-zA-Z]+|Finn[a-zA-Z]+"), new TestCase("a[^x]{20}b"), new TestCase("Tom|Sawyer|Huckleberry|Finn"), new TestCase(".{0,3}(Tom|Sawyer|Huckleberry|Finn)"), new TestCase("[a-zA-Z]+ing"), new TestCase("^[a-zA-Z]{0,4}ing[^a-zA-Z]"), new TestCase("[a-zA-Z]+ing$"), new TestCase("^[a-zA-Z ]{5,}$"), new TestCase("^.{16,20}$"), new TestCase("([a-f](.[d-m].){0,2}[h-n]){2}"), new TestCase("([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]"), new TestCase(@"""[^""]{0,30}[?!\.]"""), new TestCase("Tom.{10,25}river|river.{10,25}Tom") }; Console.Write("\tRunning 'First Match' test..."); foreach (var testcase in testcases) { var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1); var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline); var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline); watch.Start(); var re2ByteMatch = re2b.Match(haybytes); testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var re2StringMatch = re2s.Match(haystring); testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var netMatch = nets.Match(haystring); testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); if (re2ByteMatch.Value != re2StringMatch.Value) { Console.WriteLine(); Console.WriteLine("\tMatch.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern); Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters."); Console.WriteLine("\tRE2 bytes value: " + re2ByteMatch.Value); Console.WriteLine("\tRE2 string value: " + re2StringMatch.Value); Console.WriteLine(); } if (re2StringMatch.Value != netMatch.Value) { Console.WriteLine(); Console.WriteLine("\tMatch.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern); Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters."); Console.WriteLine("\tRE2 string value: " + re2StringMatch.Value); Console.WriteLine("\t.NET string value: " + netMatch.Value); Console.WriteLine(); } Debug.Assert(re2StringMatch.Index == netMatch.Index); Debug.Assert(re2StringMatch.Length == netMatch.Length); } Console.WriteLine("\n\nResults:\n\n"); PrintByteVsStringResults(testcases); Console.WriteLine("\n"); PrintStringVsStringResults(testcases); Console.WriteLine("\n\t... Success.\n"); foreach (var testcase in testcases) { testcase.Reset(); } Console.Write("\n\n\tRunning 'All Matches' test..."); foreach (var testcase in testcases) { var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1); var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline); var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline); watch.Start(); var re2ByteMatches = re2b.Matches(haybytes); // Matches() methods are lazily evaluated. testcase.Re2ByteMatchCount = re2ByteMatches.Count; testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var re2StringMatches = re2s.Matches(haystring); // Matches() methods are lazily evaluated. testcase.Re2StringMatchCount = re2StringMatches.Count; testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); watch.Start(); var netMatches = nets.Matches(haystring); // Matches() methods are lazily evaluated. testcase.NETMatchCount = netMatches.Count; testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks)); watch.Reset(); Debug.Assert(re2ByteMatches.Count == re2StringMatches.Count); Debug.Assert(re2ByteMatches.Count == netMatches.Count); for (int j = 0; j < re2ByteMatches.Count; j++) { if (re2ByteMatches[j].Value != re2StringMatches[j].Value) { Console.WriteLine(); Console.WriteLine("\tMatch.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern); Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters."); Console.WriteLine("\tRE2 bytes value: " + re2ByteMatches[j].Value); Console.WriteLine("\tRE2 string value: " + re2StringMatches[j].Value); Console.WriteLine(); } if (re2StringMatches[j].Value != netMatches[j].Value) { Console.WriteLine(); Console.WriteLine("\tMatch.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern); Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters."); Console.WriteLine("\tRE2 string value: " + re2StringMatches[j].Value); Console.WriteLine("\t.NET string value: " + netMatches[j].Value); Console.WriteLine(); } } } Console.WriteLine("\n\nResults:\n\n"); PrintByteVsStringResults(testcases); Console.WriteLine("\n"); PrintStringVsStringResults(testcases); Console.WriteLine("\n\t... Success.\n"); } } catch (Exception ex) { Console.WriteLine(ex.Message + ex.StackTrace); } GC.Collect(); Console.WriteLine(); Console.WriteLine("Done."); Console.ReadLine(); }