private void DO(CrawlerInfo ci) { var uri = new Uri(ci.url.Url); var siteType = HtmlParse.RecogSite(uri); var c = new NCrawler.Crawler(uri, new HtmlDocumentProcessor(), new MyPipelineStep(ci)) { MaximumCrawlDepth = CrawlArgs.CrawlDepth(siteType), MaximumThreadCount = 5, IncludeFilter = CrawlArgs.IncludeFilter(siteType), ExcludeFilter = CrawlArgs.ExcludeFilter(siteType), }; c.Crawl(); }
public void Process(NCrawler.Crawler crawler, PropertyBag propertyBag) { var rsp = propertyBag.GetResponse(); try { HtmlDocument htmlDoc = HtmlParse.LoadFromHtml(propertyBag); var siteType = HtmlParse.RecogSite(propertyBag.ResponseUri); var records = Parse(htmlDoc, siteType); if (records == null) { return; } foreach (var record in records) { DAL.Data.Add(record); ++ci.Count; } } catch (NullReferenceException) { } }