Beispiel #1
0
        static void Main(string[] args)
        {
            try
            {
                var iterations = 5;

                var builder  = new StringBuilder();
                var haybytes = System.IO.File.ReadAllBytes(@"..\..\mtent12.txt");
                var watch    = new Stopwatch();

                watch.Start();
                var haystring  = Encoding.ASCII.GetString(haybytes);
                var encodetime = watch.Elapsed;
                watch.Reset();

                Console.WriteLine("Text length: " + haystring.Length);
                Console.WriteLine("Encoding time: " + encodetime);
                Console.WriteLine();

                var testcases = new TestCase[16] {
                    new TestCase("Twain"),
                    new TestCase("^Twain"),
                    new TestCase("Twain$"),
                    new TestCase("Huck[a-zA-Z]+|Finn[a-zA-Z]+"),
                    new TestCase("a[^x]{20}b"),
                    new TestCase("Tom|Sawyer|Huckleberry|Finn"),
                    new TestCase(".{0,3}(Tom|Sawyer|Huckleberry|Finn)"),
                    new TestCase("[a-zA-Z]+ing"),
                    new TestCase("^[a-zA-Z]{0,4}ing[^a-zA-Z]"),
                    new TestCase("[a-zA-Z]+ing$"),
                    new TestCase("^[a-zA-Z ]{5,}$"),
                    new TestCase("^.{16,20}$"),
                    new TestCase("([a-f](.[d-m].){0,2}[h-n]){2}"),
                    new TestCase("([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]"),
                    new TestCase(@"""[^""]{0,30}[?!\.]"""),
                    new TestCase("Tom.{10,25}river|river.{10,25}Tom")
                };

                Console.Write("Running 'First Match' test...");

                for(int i = 0; i < iterations; i++)
                    foreach(var testcase in testcases)
                    {
                        var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1);
                        var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline);
                        var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline);

                        watch.Start();
                        var re2ByteMatch = re2b.Match(haybytes);
                        testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var re2StringMatch = re2s.Match(haystring);
                        testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var netMatch = nets.Match(haystring);
                        testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        if(re2ByteMatch.Value != re2StringMatch.Value)
                        {
                            Console.WriteLine();
                            Console.WriteLine("Match.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern);
                            Console.WriteLine("This is not necessarily an error and may be due to accent characters.");
                            Console.WriteLine("RE2 bytes value: " + re2ByteMatch.Value);
                            Console.WriteLine("RE2 string value: " + re2StringMatch.Value);
                            Console.WriteLine();
                        }

                        if(re2StringMatch.Value != netMatch.Value)
                        {
                            Console.WriteLine();
                            Console.WriteLine("Match.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern);
                            Console.WriteLine("This is not necessarily an error and may be due to accent characters.");
                            Console.WriteLine("RE2 string value: " + re2StringMatch.Value);
                            Console.WriteLine(".NET string value: " + netMatch.Value);
                            Console.WriteLine();
                        }

                        Assert.AreEqual(re2StringMatch.Index, netMatch.Index, "Match.Index: RE2 string, NET : " + re2b.Pattern);
                        Assert.AreEqual(re2StringMatch.Length, netMatch.Length, "Match.Length: RE2 string, NET : " + re2b.Pattern);
                    }

                Console.WriteLine("\n\nResults:\n\n");

                PrintByteVsStringResults(testcases);
                Console.WriteLine("\n");
                PrintStringVsStringResults(testcases);

                foreach(var testcase in testcases)
                    testcase.Reset();

                Console.Write("\n\nRunning 'All Matches' test...");

                for(int i = 0; i < iterations; i++)
                    foreach(var testcase in testcases)
                    {
                        var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1);
                        var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline);
                        var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline);

                        watch.Start();
                        var re2ByteMatches = re2b.Matches(haybytes);
                        // Matches() methods are lazily evaluated.
                        testcase.Re2ByteMatchCount = re2ByteMatches.Count;
                        testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var re2StringMatches = re2s.Matches(haystring);
                        // Matches() methods are lazily evaluated.
                        testcase.Re2StringMatchCount = re2StringMatches.Count;
                        testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var netMatches = nets.Matches(haystring);
                        // Matches() methods are lazily evaluated.
                        testcase.NETMatchCount = netMatches.Count;
                        testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        Assert.AreEqual(re2ByteMatches.Count, re2StringMatches.Count, "Match.Count: RE2 bytes, RE2 string : " + re2b.Pattern);
                        Assert.AreEqual(re2ByteMatches.Count, netMatches.Count, "Match.Count: RE2 bytes, NET : " + re2b.Pattern);

                        for(int j = 0; j < re2ByteMatches.Count; j++)
                        {
                            if(re2ByteMatches[j].Value != re2StringMatches[j].Value)
                            {
                                Console.WriteLine();
                                Console.WriteLine("Match.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern);
                                Console.WriteLine("This is not necessarily an error and may be due to accent characters.");
                                Console.WriteLine("RE2 bytes value: " + re2ByteMatches[j].Value);
                                Console.WriteLine("RE2 string value: " + re2StringMatches[j].Value);
                                Console.WriteLine();
                            }

                            if(re2StringMatches[j].Value != netMatches[j].Value)
                            {
                                Console.WriteLine();
                                Console.WriteLine("Match.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern);
                                Console.WriteLine("This is not necessarily an error and may be due to accent characters.");
                                Console.WriteLine("RE2 string value: " + re2StringMatches[j].Value);
                                Console.WriteLine(".NET string value: " + netMatches[j].Value);
                                Console.WriteLine();
                            }
                        }
                    }

                Console.WriteLine("\n\nResults:\n\n");

                PrintByteVsStringResults(testcases);
                Console.WriteLine("\n");
                PrintStringVsStringResults(testcases);
            }
            catch(Exception ex)
            {
                Console.WriteLine(ex.Message + ex.StackTrace);
            }

            GC.Collect();
            Console.WriteLine("Done.");
            Console.ReadLine();
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            try
            {
                {
                    Console.WriteLine("Running issue #1 test ...");
                    string          source  = "red car white car";
                    string          pattern = @"(\w+)\s+(car)";
                    MatchCollection matches = Regex.Matches(source, pattern);
                    Debug.Assert(matches.Count == 2);
                    Debug.Assert(matches[0].Value == "red car");
                    Debug.Assert(matches[1].Value == "white car");
                    Debug.Assert(matches[0].Index == 0);
                    Debug.Assert(matches[1].Index == 8);
                    Console.WriteLine("\t... Success.\n");


                    Console.WriteLine("Running issue #2 test ...");
                    var r = new rr.Regex(@"\d*");
                    Debug.Assert(r.Match("123", 1).Index == 1);
                    Debug.Assert(r.Match("123", 1).Length == 2);
                    Debug.Assert(r.Match("123", 1).Value == "23");
                    Console.WriteLine("\t... Success.\n");

                    Console.WriteLine("Running issue #3 test ...");
                    int success = 0;
                    for (uint i = 0; i < 100; ++i)
                    {
                        using (var re2 = new Regex("a"))
                        {
                            Match match = re2.Match("a");
                            if (match.Success)
                            {
                                success++;
                            }
                        }
                    }
                    Debug.Assert(success == 100);
                    Console.WriteLine("\t... Success.\n");
                }

                {
                    Console.WriteLine("Running simple tests ...");
                    // Five matches, each with a value of "".
                    Debug.Assert(Regex.Matches("xxxx", "").Count == 5);
                    // Accessing an array (technically, the default Item property) with a negative index? Yup!
                    Debug.Assert(Regex.Matches("xxxx", "")[0].Groups[-1].Value == "");
                    // The returned Match containes a Group collection, and the first item in the Group collection is the Match.
                    Match match = Regex.Match("abcd", "abcd");
                    Debug.Assert((Group)match == match.Groups[0]);
                    // .NET allows invalid UTF-16 strings (including freestanding codepoints).
                    Debug.Assert(Regex.Match("\xD800", "\xD800").Length == 1);
                    // The translation isn't consistent for invalid strings, though.
                    Debug.Assert(Regex.Match(Encoding.UTF8.GetBytes("\xD800"), "\xD800") == Match.Empty);
                    // 2-byte UTF-16 to 3-byte UTF-8.
                    Debug.Assert(Regex.Match("水DŽ", "水DŽ").Length == 2);
                    Debug.Assert(Regex.Match(Encoding.UTF8.GetBytes("水DŽ"), "水DŽ").Length == 5);
                    // To the BMP ... and beyond!
                    Debug.Assert(Regex.Match("xx𠜎𠜱𠝹𠱓", "𠜎𠜱𠝹𠱓").Value == "𠜎𠜱𠝹𠱓");
                    var r = new rr.Regex("𠝹𠱓");
                    // Disallow beginning searches in the middle of a UTF-16 surrogate pair.
                    bool exception = false;
                    try
                    {
                        r.Match("𠜎𠜱𠝹𠱓", 1);
                    }
                    catch (ArgumentException)
                    {
                        exception = true;
                    }
                    // Indices and lengths are reported as UTF-16 code units.
                    Debug.Assert(r.Match("𠜎水𠜱𠝹𠱓", 5).Index == 5);
                    Debug.Assert(r.Match("𠜎水𠜱𠝹𠱓", 5).Length == 4);
                    Debug.Assert(exception);
                    Console.WriteLine("\t... Success.\n");
                }

                {
                    Console.WriteLine("Running performance tests ...\n");

                    var builder  = new StringBuilder();
                    var haybytes = System.IO.File.ReadAllBytes(@"..\..\mtent12.txt");
                    var watch    = new Stopwatch();

                    watch.Start();
                    var haystring  = Encoding.ASCII.GetString(haybytes);
                    var encodetime = watch.Elapsed;
                    watch.Reset();

                    Console.WriteLine("\tText length: " + haystring.Length);
                    Console.WriteLine("\tEncoding time: " + encodetime);
                    Console.WriteLine();

                    var testcases = new TestCase[16] {
                        new TestCase("Twain"),
                        new TestCase("^Twain"),
                        new TestCase("Twain$"),
                        new TestCase("Huck[a-zA-Z]+|Finn[a-zA-Z]+"),
                        new TestCase("a[^x]{20}b"),
                        new TestCase("Tom|Sawyer|Huckleberry|Finn"),
                        new TestCase(".{0,3}(Tom|Sawyer|Huckleberry|Finn)"),
                        new TestCase("[a-zA-Z]+ing"),
                        new TestCase("^[a-zA-Z]{0,4}ing[^a-zA-Z]"),
                        new TestCase("[a-zA-Z]+ing$"),
                        new TestCase("^[a-zA-Z ]{5,}$"),
                        new TestCase("^.{16,20}$"),
                        new TestCase("([a-f](.[d-m].){0,2}[h-n]){2}"),
                        new TestCase("([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]"),
                        new TestCase(@"""[^""]{0,30}[?!\.]"""),
                        new TestCase("Tom.{10,25}river|river.{10,25}Tom")
                    };

                    Console.Write("\tRunning 'First Match' test...");


                    foreach (var testcase in testcases)
                    {
                        var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1);
                        var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline);
                        var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline);

                        watch.Start();
                        var re2ByteMatch = re2b.Match(haybytes);
                        testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var re2StringMatch = re2s.Match(haystring);
                        testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var netMatch = nets.Match(haystring);
                        testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        if (re2ByteMatch.Value != re2StringMatch.Value)
                        {
                            Console.WriteLine();
                            Console.WriteLine("\tMatch.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern);
                            Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters.");
                            Console.WriteLine("\tRE2 bytes value: " + re2ByteMatch.Value);
                            Console.WriteLine("\tRE2 string value: " + re2StringMatch.Value);
                            Console.WriteLine();
                        }

                        if (re2StringMatch.Value != netMatch.Value)
                        {
                            Console.WriteLine();
                            Console.WriteLine("\tMatch.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern);
                            Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters.");
                            Console.WriteLine("\tRE2 string value: " + re2StringMatch.Value);
                            Console.WriteLine("\t.NET string value: " + netMatch.Value);
                            Console.WriteLine();
                        }

                        Debug.Assert(re2StringMatch.Index == netMatch.Index);
                        Debug.Assert(re2StringMatch.Length == netMatch.Length);
                    }

                    Console.WriteLine("\n\nResults:\n\n");

                    PrintByteVsStringResults(testcases);
                    Console.WriteLine("\n");
                    PrintStringVsStringResults(testcases);

                    Console.WriteLine("\n\t... Success.\n");

                    foreach (var testcase in testcases)
                    {
                        testcase.Reset();
                    }

                    Console.Write("\n\n\tRunning 'All Matches' test...");

                    foreach (var testcase in testcases)
                    {
                        var re2b = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline | rr.RegexOptions.Latin1);
                        var re2s = new rr.Regex(testcase.Pattern, rr.RegexOptions.Multiline);
                        var nets = new nn.Regex(testcase.Pattern, nn.RegexOptions.Multiline);

                        watch.Start();
                        var re2ByteMatches = re2b.Matches(haybytes);
                        // Matches() methods are lazily evaluated.
                        testcase.Re2ByteMatchCount = re2ByteMatches.Count;
                        testcase.AddRe2ByteResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var re2StringMatches = re2s.Matches(haystring);
                        // Matches() methods are lazily evaluated.
                        testcase.Re2StringMatchCount = re2StringMatches.Count;
                        testcase.AddRe2StringResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        watch.Start();
                        var netMatches = nets.Matches(haystring);
                        // Matches() methods are lazily evaluated.
                        testcase.NETMatchCount = netMatches.Count;
                        testcase.AddNETResult(TimerTicksToMilliseconds(watch.ElapsedTicks));
                        watch.Reset();

                        Debug.Assert(re2ByteMatches.Count == re2StringMatches.Count);
                        Debug.Assert(re2ByteMatches.Count == netMatches.Count);

                        for (int j = 0; j < re2ByteMatches.Count; j++)
                        {
                            if (re2ByteMatches[j].Value != re2StringMatches[j].Value)
                            {
                                Console.WriteLine();
                                Console.WriteLine("\tMatch.Value: RE2 bytes failed to match RE2 string for pattern " + re2b.Pattern);
                                Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters.");
                                Console.WriteLine("\tRE2 bytes value: " + re2ByteMatches[j].Value);
                                Console.WriteLine("\tRE2 string value: " + re2StringMatches[j].Value);
                                Console.WriteLine();
                            }

                            if (re2StringMatches[j].Value != netMatches[j].Value)
                            {
                                Console.WriteLine();
                                Console.WriteLine("\tMatch.Value: RE2 string failed to match .NET string for pattern " + re2b.Pattern);
                                Console.WriteLine("\tThis is not necessarily an error and may be due to accent characters.");
                                Console.WriteLine("\tRE2 string value: " + re2StringMatches[j].Value);
                                Console.WriteLine("\t.NET string value: " + netMatches[j].Value);
                                Console.WriteLine();
                            }
                        }
                    }

                    Console.WriteLine("\n\nResults:\n\n");

                    PrintByteVsStringResults(testcases);
                    Console.WriteLine("\n");
                    PrintStringVsStringResults(testcases);

                    Console.WriteLine("\n\t... Success.\n");
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message + ex.StackTrace);
            }

            GC.Collect();
            Console.WriteLine();
            Console.WriteLine("Done.");
            Console.ReadLine();
        }