public static void Run(SavePageForm parentForm, string url) { Form = parentForm; Url = url; c = new Crawler(new Uri(url), new HtmlDocumentProcessor(), // Process html new SaveFileStep()); c.MaximumThreadCount = 1; c.MaximumCrawlDepth = 1; c.ExcludeFilter = CrawlUtil.ExtensionsToSkip; c.AdhereToRobotRules = false; c.CrawlFinished += new EventHandler <NCrawler.Events.CrawlFinishedEventArgs>(c_CrawlFinished); string ua = CrawlUtil.GetRandomUnblockedUserAgent(UserAgentTracker); //if there are no unblocked user agents left then reset the tracker and retry if (ua == null) { UserAgentTracker = CrawlUtil.InitUserAgentTracker(); ua = CrawlUtil.GetRandomUnblockedUserAgent(UserAgentTracker); } c.UserAgent = ua; // Begin crawl c.Crawl(); }
public static void Run(MainForm parentForm, Book book, bool getRatingStats, bool getReviews) { form = parentForm; IsolatedStorageModule.Setup(false); GetRatingStats = getRatingStats; GetReviews = getReviews; currentBook = book; baseUri = book.reviewPageURL; /* * 140185852 We * http://www.amazon.com/We-Yevgeny-Zamyatin/product-reviews/0140185852 * http://www.amazon.com/We-Yevgeny-Zamyatin/product-reviews/0140185852/ref=cm_cr_pr_btm_link_4?pageSize=50&pageNumber=4&sortBy=recent * * 618260307 The Hobbit http://www.amazon.com/The-Hobbit-J-R-R-Tolkien/product-reviews/0618260307 */ baseUri += "/ref=cm_cr_pr_btm_link_1?pageSize=50&pageNumber=1"; if (!currentBook.reviewPageURL.Contains("/ref=cm_cr_pr_btm_link")) { currentBook.reviewPageURL = baseUri; //hack to make isFirstPage() work [2016-02-04] } c = new Crawler(new Uri(baseUri), new HtmlDocumentProcessor(), // Process html new ReviewPageProcessStep(), new SaveFileStep()); // Custom step to visualize crawl c.MaximumThreadCount = 1; c.MaximumCrawlDepth = 1; c.ExcludeFilter = CrawlUtil.ExtensionsToSkip; c.AdhereToRobotRules = false; c.BeforeDownload += new EventHandler <NCrawler.Events.BeforeDownloadEventArgs>(c_BeforeDownload); string ua = CrawlUtil.GetRandomUnblockedUserAgent(UserAgentTracker); //if there are no unblocked user agents left then reset the tracker and retry if (ua == null) { UserAgentTracker = CrawlUtil.InitUserAgentTracker(); ua = CrawlUtil.GetRandomUnblockedUserAgent(UserAgentTracker); } c.UserAgent = ua; // Begin crawl c.Crawl(); }
public static void Run(MainForm parentForm, Book book, bool getDetailsAndAuthor, bool getRanks) { form = parentForm; IsolatedStorageModule.Setup(false); GetDetailsAndAuthor = getDetailsAndAuthor; GetRanks = getRanks; currentBook = book; baseUri = book.detailPageURL; Uri u = new Uri(Uri.EscapeUriString(baseUri)); c = new Crawler(u, new HtmlDocumentProcessor(), // Process html new DetailPageDumperStep()); // Custom step to visualize crawl c.MaximumThreadCount = 1; c.MaximumCrawlDepth = 1; c.ExcludeFilter = CrawlUtil.ExtensionsToSkip; c.AdhereToRobotRules = false; string ua = CrawlUtil.GetRandomUnblockedUserAgent(UserAgentTracker); //if there are no unblocked user agents left then reset the tracker and retry if (ua == null) { UserAgentTracker = CrawlUtil.InitUserAgentTracker(); ua = CrawlUtil.GetRandomUnblockedUserAgent(UserAgentTracker); } c.UserAgent = ua; // Begin crawl c.Crawl(); }