示例#1
0
        public Page(HtmlNode reviewsNode, ParentPage parentPage, int pageNumber)
        {
            _reviewsNode = reviewsNode;
            _parentPage  = parentPage;
            _pageNumber  = pageNumber;

            var handler = new HttpClientHandler();

            handler.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;

            _httpClient = new HttpClient(handler);
        }
示例#2
0
        static async Task RunAsync(string startUrl, string SavePath, int?maxPages)
        {
            Console.WriteLine("Starting.");

            var parentPage    = new ParentPage(startUrl);
            var firstPageNode = await parentPage.GetFirstPage();

            var page      = new Page(firstPageNode, parentPage, 1);
            var reviews   = new List <Review>();
            var pageCount = 0;

            while (page != null)
            {
                pageCount = page.PageNumber;

                if (maxPages > 0 && pageCount >= maxPages)
                {
                    break;
                }

                // get reviews
                var pageReviews = await page.GetReviewsAsync();

                Console.WriteLine($"Found {pageReviews.Count} reviews on page {page.PageNumber}.");

                reviews.AddRange(pageReviews);

                // get next page
                page = await page.GetNextPageAsync();
            }

            Console.WriteLine($"Found {reviews.Count} reviews from {pageCount} pages.");

            //save to CSV
            reviews.ToCSV(SavePath);
            Console.WriteLine("Complete");
            Console.ReadLine();
        }