public void A05_AsyncRegExCrawlerTwoExpressionMultipleResultTest()
        {
            // Read the content
            string content = File.ReadAllText("./Testdata/testdata.log");

            // build search term one
            var regExSearchTerm1 = new RegExSearchTerm()
            {
                Expression = @"(\d{4}-\d{2}-\d{2}).+(CatalogCacheUpdateJob perform)(.+)",
                ExpressionFriendlyName = "SearchTerm 1"
            };

            // build search term two
            var regExSearchTerm2 = new RegExSearchTerm()
            {
                Expression = @"2015-06-27 12:03:04,721  INFO   \[EJB default - 5\] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY",
                ExpressionFriendlyName = "Ex1"
            };

            // fill the search term list
            var regExSearchList = new List<RegExSearchTerm>() { regExSearchTerm1, regExSearchTerm2 };

            // build the expected findigs
            // build finding one
            var expectedFinding1 = new Finding()
            {
                Expression = @"(\d{4}-\d{2}-\d{2}).+(CatalogCacheUpdateJob perform)(.+)",
                ExpressionFriendlyName = "SearchTerm 1",
                FileFolder = @"D:\WORK\RegExTractor\src\RegExTractor\RegExTractorTests\bin\Debug\Testdata",
                FileName = "testdata.log",
                Match = new List<RegExTractorMatchCollection>()
                {
                    // first match
                    new RegExTractorMatchCollection()
                    {
                        Id = 1,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-27 13:00:00,005  INFO   [EJB default - 2] CatalogCacheUpdateJob perform - Performing scheduled reload of catalog cache...\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-27"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Performing scheduled reload of catalog cache...\r"
                            }
                        }
                    },

                    // second match
                    new RegExTractorMatchCollection()
                    {
                        Id = 2,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-27 13:01:30,110  INFO   [EJB default - 2] CatalogCacheUpdateJob perform - Scheduled catalog cache reload successfully completed.\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-27"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Scheduled catalog cache reload successfully completed.\r"
                            }
                        }
                    },

                    // third match
                    new RegExTractorMatchCollection()
                    {
                        Id = 3,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-28 01:00:00,006  INFO   [EJB default - 6] CatalogCacheUpdateJob perform - Performing scheduled reload of catalog cache...\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-28"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Performing scheduled reload of catalog cache...\r"
                            }
                        }
                    },

                    // fourth match
                    new RegExTractorMatchCollection()
                    {
                        Id = 4,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-28 01:01:29,226  INFO   [EJB default - 6] CatalogCacheUpdateJob perform - Scheduled catalog cache reload successfully completed.\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-28"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Scheduled catalog cache reload successfully completed.\r"
                            }
                        }
                    }

                }
            };

            // build finding two
            var expectedFinding2 = new Finding()
            {
                Expression = @"2015-06-27 12:03:04,721  INFO   \[EJB default - 5\] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY",
                ExpressionFriendlyName = "Ex1",
                FileFolder = @"D:\WORK\RegExTractor\src\RegExTractor\RegExTractorTests\bin\Debug\Testdata",
                FileName = "testdata.log",
                Match = new List<RegExTractorMatchCollection>()
                {
                   new RegExTractorMatchCollection()
                   {
                       Id = 1,
                       MatchCollection = new List<RegExTractorMatch>()
                       {
                           new RegExTractorMatch()
                           {
                               Id = 1,
                               Match = "2015-06-27 12:03:04,721  INFO   [EJB default - 5] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY"
                           }
                       }
                   }
                }
            };

            // build expected findings list
            var expected = new List<Finding>() { expectedFinding1, expectedFinding2 };

            // do the magic and crawl!
            IRegExCrawler crawler = new AsyncRegExCrawler();
            var actual = crawler.Crawl(regExSearchList, content, expectedFinding1.FileName, expectedFinding1.FileFolder);

            var comparer = new CompareLogic();
            comparer.Config.IgnoreCollectionOrder = true;
            var compareResult = comparer.Compare(expected, actual);
            Assert.IsTrue(compareResult.AreEqual, compareResult.DifferencesString);

            Assert.AreEqual(expected.Count, actual.Count, "Another result set expected.");
        }
        public void A01_SimpleRexExCrawlerTest()
        {
            string content = File.ReadAllText("./Testdata/testdata.log");

            var regExSearchTerm1 = new RegExSearchTerm()
            {
                Expression = @"(\d{4}-\d{2}-\d{2}).+(CatalogCacheUpdateJob perform)(.+)",
                ExpressionFriendlyName = "SearchTerm 1"
            };

            var regExSearchList = new List<RegExSearchTerm>() { regExSearchTerm1 };

            var expectedFinding1 = new Finding()
            {
                Expression = @"(\d{4}-\d{2}-\d{2}).+(CatalogCacheUpdateJob perform)(.+)",
                ExpressionFriendlyName = "SearchTerm 1",
                FileFolder = @"D:\WORK\RegExTractor\src\RegExTractor\RegExTractorTests\bin\Debug\Testdata",
                FileName = "testdata.log",
                Match = new List<RegExTractorMatchCollection>()
                {
                    // first match
                    new RegExTractorMatchCollection()
                    {
                        Id = 1,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-27 13:00:00,005  INFO   [EJB default - 2] CatalogCacheUpdateJob perform - Performing scheduled reload of catalog cache...\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-27"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Performing scheduled reload of catalog cache...\r"
                            }
                        }
                    },

                    // second match
                    new RegExTractorMatchCollection()
                    {
                        Id = 2,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-27 13:01:30,110  INFO   [EJB default - 2] CatalogCacheUpdateJob perform - Scheduled catalog cache reload successfully completed.\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-27"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Scheduled catalog cache reload successfully completed.\r"
                            }
                        }
                    },

                    // third match
                    new RegExTractorMatchCollection()
                    {
                        Id = 3,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-28 01:00:00,006  INFO   [EJB default - 6] CatalogCacheUpdateJob perform - Performing scheduled reload of catalog cache...\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-28"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Performing scheduled reload of catalog cache...\r"
                            }
                        }
                    },

                    // fourth match
                    new RegExTractorMatchCollection()
                    {
                        Id = 4,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-28 01:01:29,226  INFO   [EJB default - 6] CatalogCacheUpdateJob perform - Scheduled catalog cache reload successfully completed.\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-28"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Scheduled catalog cache reload successfully completed.\r"
                            }
                        }
                    }

                }
            };

            var expected = new List<Finding>() { expectedFinding1 };

            IRegExCrawler crawler = new SimpleRegExCrawler();
            var actual = crawler.Crawl(regExSearchList, content, expected[0].FileName, expected[0].FileFolder);

            var comparer = new CompareLogic();
            var compareResult = comparer.Compare(expected, actual);
            Assert.IsTrue(compareResult.AreEqual);
        }
        public void A02_SimpleRegexCrawlerOneExpressionOneResultTest()
        {
            string content = File.ReadAllText("./Testdata/testdata.log");

            var regExSearchTerm = new RegExSearchTerm()
            {
                Expression = @"2015-06-27 12:03:04,721  INFO   \[EJB default - 5\] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY",
                ExpressionFriendlyName = "Ex1"
            };

            var expectedFinding = new Finding()
            {
                Expression = @"2015-06-27 12:03:04,721  INFO   \[EJB default - 5\] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY",
                ExpressionFriendlyName = "Ex1",
                Match = new List<RegExTractorMatchCollection>()
                {
                   new RegExTractorMatchCollection()
                   {
                       Id = 1,
                       MatchCollection = new List<RegExTractorMatch>()
                       {
                           new RegExTractorMatch()
                           {
                               Id = 1,
                               Match = "2015-06-27 12:03:04,721  INFO   [EJB default - 5] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY"
                           }
                       }
                   }
                }
            };

            IRegExCrawler crawler = new SimpleRegExCrawler();
            var actual = crawler.Crawl(new List<RegExSearchTerm>(){ regExSearchTerm}, content, expectedFinding.FileName, expectedFinding.FileFolder);

            var expected = new List<Finding>() { expectedFinding };

            Assert.AreEqual(expected[0].Expression, actual[0].Expression);
            Assert.AreEqual(expected[0].ExpressionFriendlyName, actual[0].ExpressionFriendlyName);
            Assert.AreEqual(expected[0].FileFolder, actual[0].FileFolder);
            Assert.AreEqual(expected[0].FileName, actual[0].FileName);

            var matchCount = actual[0].Match.Count();
            Assert.AreEqual(1, matchCount);

            for (int i = 0; i <= matchCount; i++)
            {
                var expectedMatch = expected[0].Match[0];
                var actualMatch = actual[0].Match[0];

                Assert.AreEqual(expectedMatch.Id, actualMatch.Id);
                Assert.AreEqual(expectedMatch.MatchCollection[0].Id, actualMatch.MatchCollection[0].Id);
            }
        }
        public void A03_SimpleRegExCrawlerTwoExpressionMultipleResultTest()
        {
            // Read the content
            string content = File.ReadAllText("./Testdata/testdata.log");

            // build search term one
            var regExSearchTerm1 = new RegExSearchTerm()
            {
                Expression = @"(\d{4}-\d{2}-\d{2}).+(CatalogCacheUpdateJob perform)(.+)",
                ExpressionFriendlyName = "SearchTerm 1"
            };

            // build search term two
            var regExSearchTerm2 = new RegExSearchTerm()
            {
                Expression = @"2015-06-27 12:03:04,721  INFO   \[EJB default - 5\] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY",
                ExpressionFriendlyName = "Ex1"
            };

            // fill the search term list
            var regExSearchList = new List<RegExSearchTerm>() { regExSearchTerm1, regExSearchTerm2 };

            // build the expected findigs
            // build finding one

            var expectedFinding1 = new Finding()
            {
                Expression = @"(\d{4}-\d{2}-\d{2}).+(CatalogCacheUpdateJob perform)(.+)",
                ExpressionFriendlyName = "SearchTerm 1",
                FileFolder = @"D:\WORK\RegExTractor\src\RegExTractor\RegExTractorTests\bin\Debug\Testdata",
                FileName = "testdata.log",
                Match = new List<RegExTractorMatchCollection>()
                {
                    // first match
                    new RegExTractorMatchCollection()
                    {
                        Id = 1,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-27 13:00:00,005  INFO   [EJB default - 2] CatalogCacheUpdateJob perform - Performing scheduled reload of catalog cache...\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-27"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Performing scheduled reload of catalog cache...\r"
                            }
                        }
                    },

                    // second match
                    new RegExTractorMatchCollection()
                    {
                        Id = 2,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-27 13:01:30,110  INFO   [EJB default - 2] CatalogCacheUpdateJob perform - Scheduled catalog cache reload successfully completed.\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-27"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Scheduled catalog cache reload successfully completed.\r"
                            }
                        }
                    },

                    // third match
                    new RegExTractorMatchCollection()
                    {
                        Id = 3,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-28 01:00:00,006  INFO   [EJB default - 6] CatalogCacheUpdateJob perform - Performing scheduled reload of catalog cache...\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-28"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Performing scheduled reload of catalog cache...\r"
                            }
                        }
                    },

                    // fourth match
                    new RegExTractorMatchCollection()
                    {
                        Id = 4,
                        MatchCollection = new List<RegExTractorMatch>()
                        {
                            new RegExTractorMatch()
                            {
                                Id = 1,
                                Match = "2015-06-28 01:01:29,226  INFO   [EJB default - 6] CatalogCacheUpdateJob perform - Scheduled catalog cache reload successfully completed.\r"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 2,
                                Match = "2015-06-28"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 3,
                                Match = "CatalogCacheUpdateJob perform"
                            },
                            new RegExTractorMatch()
                            {
                                Id = 4,
                                Match = " - Scheduled catalog cache reload successfully completed.\r"
                            }
                        }
                    }

                }
            };

            // build finding two
            var expectedFinding2 = new Finding()
            {
                Expression = @"2015-06-27 12:03:04,721  INFO   \[EJB default - 5\] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY",
                ExpressionFriendlyName = "Ex1",
                FileFolder = @"D:\WORK\RegExTractor\src\RegExTractor\RegExTractorTests\bin\Debug\Testdata",
                FileName = "testdata.log",
                Match = new List<RegExTractorMatchCollection>()
                {
                   new RegExTractorMatchCollection()
                   {
                       Id = 1,
                       MatchCollection = new List<RegExTractorMatch>()
                       {
                           new RegExTractorMatch()
                           {
                               Id = 1,
                               Match = "2015-06-27 12:03:04,721  INFO   [EJB default - 5] RuleChangeCommand executeChange - Setting entry 00.2285 to MODIFY"
                           }
                       }
                   }
                }
            };

            // build expected findings list
            var expected = new List<Finding>() { expectedFinding1, expectedFinding2 };

            // do the magic and crawl!
            IRegExCrawler crawler = new SimpleRegExCrawler();
            var actual = crawler.Crawl(regExSearchList, content, expectedFinding1.FileName, expectedFinding1.FileFolder);

            KellermanSoftware.CompareNetObjects.CompareLogic l = new KellermanSoftware.CompareNetObjects.CompareLogic();
            var result = l.Compare(expected, actual);
            Assert.IsTrue(result.AreEqual);

            // check the results
            // result for finding one
            Assert.AreEqual(expected[0].Expression, actual[0].Expression);
            Assert.AreEqual(expected[0].ExpressionFriendlyName, actual[0].ExpressionFriendlyName);
            //Assert.AreEqual(expected.FirstOrDefault().FileFolder, actual.FirstOrDefault().FileFolder);
            //Assert.AreEqual(expected.FirstOrDefault().FileName, actual.FirstOrDefault().FileName);

            var matchCount = actual[0].Match.Count();
            Assert.AreEqual(4, matchCount);

            for (int i = 0; i <= matchCount; i++)
            {
                var expectedMatch = expected[0].Match[0];
                var actualMatch = actual[0].Match[0];

                Assert.AreEqual(expectedMatch.Id, actualMatch.Id);
                Assert.AreEqual(expectedMatch.MatchCollection[0].Id, actualMatch.MatchCollection[0].Id);
                Assert.AreEqual(expectedMatch.MatchCollection[1].Id, actualMatch.MatchCollection[1].Id);
                Assert.AreEqual(expectedMatch.MatchCollection[2].Id, actualMatch.MatchCollection[2].Id);
            }

            // result for finding two
            Assert.AreEqual(expected[1].Expression, actual[1].Expression);
            Assert.AreEqual(expected[1].ExpressionFriendlyName, actual[1].ExpressionFriendlyName);
            //Assert.AreEqual(expected.FirstOrDefault().FileFolder, actual.FirstOrDefault().FileFolder);
            //Assert.AreEqual(expected.FirstOrDefault().FileName, actual.FirstOrDefault().FileName);

            matchCount = actual[1].Match.Count();
            Assert.AreEqual(1, matchCount);

            for (int i = 0; i <= matchCount; i++)
            {
                var expectedMatch = expected[1].Match[0];
                var actualMatch = actual[1].Match[0];

                Assert.AreEqual(expectedMatch.Id, actualMatch.Id);
                Assert.AreEqual(expectedMatch.MatchCollection[0].Id, actualMatch.MatchCollection[0].Id);
            }
        }
        /// <summary>
        /// Parse the given content with the given search terms.
        /// </summary>
        /// <param name="SearchTerms"></param>
        /// <param name="Content"></param>
        /// <param name="FileName"></param>
        /// <param name="FileFolder"></param>
        /// <returns></returns>
        public List<Finding> Crawl(List<RegExSearchTerm> SearchTerms, string Content, string FileName, string FileFolder)
        {
            var findingResultList = new List<Finding>();

            // iterate through search terms
            // each search term may result in a finding
            // this finding will be added to finding result list
            foreach (var searchTerm in SearchTerms)
            {
                var finding = new Finding()
                {
                    Expression = searchTerm.Expression,
                    ExpressionFriendlyName = searchTerm.ExpressionFriendlyName,
                    FileName = FileName,
                    FileFolder = FileFolder,
                    Match = new List<RegExTractorMatchCollection>()
                };

                var regEx = new Regex(searchTerm.Expression);
                var regExMatchCollection = regEx.Matches(Content);

                // the System.Text.RegularExpression.Regex.Matches has not index
                // so we have to count the iterations by ourself
                int matchCount = 0;

                // iterate through match collection
                foreach (Match regExMatch in regExMatchCollection)
                {
                    // increase matchCount
                    matchCount++;
                    // create a new RegExtractorMatchCollection
                    var resultMatchCollection = new RegExTractorMatchCollection()
                    {
                        // set the id of the collection
                        Id = matchCount, MatchCollection = new List<RegExTractorMatch>()
                    };

                    // count match groups
                    var groupsCount = regExMatch.Groups.Count;

                    // loop throug groups
                    for (int g = 0; g < groupsCount; g++)
                    {
                        // create a single RegExTractorMatch
                        var resultMatch = new RegExTractorMatch()
                        {
                            Id = g + 1,
                            Match = regExMatch.Groups[g].Value
                        };
                        // add this match to result collection
                        resultMatchCollection.MatchCollection.Add(resultMatch);
                    }

                    // add the result match collection to finding result
                    finding.Match.Add(resultMatchCollection);

                }
                // add finding to findig list
                findingResultList.Add(finding);

                // report progress
                var eventArgs = new ReportProgressEventArgs();
                eventArgs.Message = String.Format(@"Finished search for expression {0} in file {1}\{2}", searchTerm.ExpressionFriendlyName, FileFolder, FileName);
                OnSingleFileCrawlFinished(eventArgs);
            }

            if (findingResultList == null) throw new ArgumentNullException();
            return findingResultList;
        }