Example #1
0
        static void Run(CrawlWorker crawler)
        {
            CandidateTracker candidateTracker = new CandidateTracker();
            ScoreManager     scoreManager     = new ScoreManager(
                new List <IStringComparisonScorer> {
                new JaccardScorer(),
                //new FixedComparisonJaccardScorer(File.ReadAllText(@".\comparisonText.txt"))
                new SearchTermsScorer(File.ReadAllText(@".\comparisonText.txt").Split(' '))
            },
                new List <IScoreThresholdRule> {
                new MinMaxScoreThresholdRule(0.4, 0.99, candidateTracker.HandleCandidate)
            }
                );
            string frontierFileName = "Frontier_" + CrawlSettings.CrawlName;

            foreach (PageCrawl crawl in crawler.Start())
            {
                databases.ForEach(db => db.InsertPageCrawl(crawl));

                scoreManager.Score(crawl, crawl.Page.LinkedFrom?.Crawl?.Content);
                candidateTracker.WriteToDisk();

                File.WriteAllText(frontierFileName, crawler.Frontiers);
            }

            Console.WriteLine("Ran out of links!");
        }
Example #2
0
        static void Main(string[] args)
        {
            if (!HandleArgs(args))
            {
                return;
            }

            CrawlWorker crawler = null;

            try
            {
                crawler = new CrawlWorker(seeds);

                Run(crawler);
            }
            catch (Exception ex)
            {
                CrashManager.Handle(ex, new object[] { crawler });
            }

            Console.ReadKey();
        }