internal static void AddReviewPagesToCrawl_Old(NCrawler.Crawler crawler, string lastPageUrl, int maxPage) { int pageNum = 0; Match m = pageNumberFromUrl.Match(lastPageUrl); if (m.Groups.Count > 1) { int.TryParse(m.Groups[1].Value, out pageNum); if (pageNum < maxPage) { maxPage = pageNum; //if there are less than maxPage pages then only add up to last page } } else { maxPage = 0; } for (int i = 2; i <= maxPage; i++) //only crawl up to maxPage pages { //http://www.amazon.com/The-Screwtape-Letters-Proposes-Toast/product-reviews/0060652896/ref=cm_cr_pr_top_link_18?ie=UTF8&pageNumber=18&showViewpoints=0&sortBy=bySubmissionDateAscending string s = lastPageUrl.Replace("&pageNumber=" + pageNum + "&", "&pageNumber=" + i + "&"); crawler.AddStep(new Uri(lastPageUrl.Replace("&pageNumber=" + pageNum + "&", "&pageNumber=" + i + "&")), 0); } }
internal static void AddReviewPagesToCrawl(NCrawler.Crawler crawler, string baseUrl, int lastPageNum) { for (int i = 2; i <= lastPageNum; i++) { //http://www.amazon.com/We-Yevgeny-Zamyatin/product-reviews/0140185852/ref=cm_cr_pr_btm_link_4?pageSize=50&pageNumber=4&sortBy=recent string url = baseUrl.Replace("&pageNumber=1", "&pageNumber=" + i); url = url.Replace("ref=cm_cr_pr_btm_link_1", "ref=cm_cr_pr_btm_link_" + i); crawler.AddStep(new Uri(url), 0); } }