public void Start(List <UrlInfo> urls) { dict = new Dictionary <Guid, CrawlerInfo>(); foreach (UrlInfo url in urls) { CrawlerInfo s = new CrawlerInfo(url); s.Init(() => DO(s)); s.Collect.Start(); dict.Add(url.Id, s); } }
private void DO(CrawlerInfo ci) { var uri = new Uri(ci.url.Url); var siteType = HtmlParse.RecogSite(uri); var c = new NCrawler.Crawler(uri, new HtmlDocumentProcessor(), new MyPipelineStep(ci)) { MaximumCrawlDepth = CrawlArgs.CrawlDepth(siteType), MaximumThreadCount = 5, IncludeFilter = CrawlArgs.IncludeFilter(siteType), ExcludeFilter = CrawlArgs.ExcludeFilter(siteType), }; c.Crawl(); }
public MyPipelineStep(CrawlerInfo aCrawlerInfo) { ci = aCrawlerInfo; }