static void Run(CrawlWorker crawler) { CandidateTracker candidateTracker = new CandidateTracker(); ScoreManager scoreManager = new ScoreManager( new List <IStringComparisonScorer> { new JaccardScorer(), //new FixedComparisonJaccardScorer(File.ReadAllText(@".\comparisonText.txt")) new SearchTermsScorer(File.ReadAllText(@".\comparisonText.txt").Split(' ')) }, new List <IScoreThresholdRule> { new MinMaxScoreThresholdRule(0.4, 0.99, candidateTracker.HandleCandidate) } ); string frontierFileName = "Frontier_" + CrawlSettings.CrawlName; foreach (PageCrawl crawl in crawler.Start()) { databases.ForEach(db => db.InsertPageCrawl(crawl)); scoreManager.Score(crawl, crawl.Page.LinkedFrom?.Crawl?.Content); candidateTracker.WriteToDisk(); File.WriteAllText(frontierFileName, crawler.Frontiers); } Console.WriteLine("Ran out of links!"); }
static void Main(string[] args) { if (!HandleArgs(args)) { return; } CrawlWorker crawler = null; try { crawler = new CrawlWorker(seeds); Run(crawler); } catch (Exception ex) { CrashManager.Handle(ex, new object[] { crawler }); } Console.ReadKey(); }