Exemple #1
0
        public void CanGetTheLastPageNumberIfTotalProductIsGiven()
        {
            //Arrange
            _settingRepository.Setup(x => x.GetSettingValue <bool>("PageListScrapper.Pagination.IsLastPageGiven")).Returns(false);
            _settingRepository.Setup(x => x.GetSettingValue <bool>("PageListScrapper.Pagination.IsTotalNumberOfProductsGiven")).Returns(true);
            _settingRepository.Setup(x => x.GetSettingValue <int>("PageListScrapper.Pagination.ProductsPerPage")).Returns(40);
            _settingRepository.Setup(x => x.GetSettingValue <string>("PageListScrapper.Pagination.TotalNumberOfProductsSelector")).Returns(".products .gallery-header .summary .results .total");


            var scrapper = new PageListScrapper(new SettingRepository(), _loggingService.Object, _webQueryService.Object, MightyAppePageListScrapperTest.LoadHtmlFromText())
            {
                PaginationSettings = new PageListPagination {
                    PaginationSelector = ".pagination li active span"
                }
            };

            var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper);
            var node = scrapper.GetLoadedHtmlNode();

            //Act
            var lastPage = pageListPaginationService.GetLastPageNumber(node);

            //Assert
            Assert.That(lastPage, Is.EqualTo(13));
        }
        public virtual async Task Scrape()
        {
            //Loads the initial page
            var scrappedData = await _scrapper.Scrape();

            //Save to db
            _resultCollectionService.SaveScrappedData(scrappedData);

            var htmlNode    = _scrapper.GetLoadedHtmlNode();
            var isFirstPage = _pageListPaginationService.IsFirstPage(htmlNode);

            if (isFirstPage)
            {
                var lastPage = _pageListPaginationService.GetLastPageNumber(htmlNode);
                if (lastPage > 1)
                {
                    ScrapeOtherPages(lastPage, _scrapper);
                }
            }


            //https://www.mightyape.co.nz/games/ps4/best-sellers?page=1
            //Scrape the whole list? PageListScrapper.Strategy.AutomaticallyScrapeAllInCategory


            //This is only for scraping page with normal pagination.
            //If the page has LoadMore button like ComputerLounge or Carousell, then use SmartScrapper


            //Goal is to find the last page number
            //if not given,
            //Determine if there is a summary of pagination. Like how many items are displaying per page and what is the total products in the category
            //Check if total number of product in the list is given. Example. 1 to 40 of 601. Total product is 601
            //Determine how many products are displaying in the list. Example. 1 to 40 of 601. Products in list are 40

            //if these are all available then it's a little bit easier.
            //However if not available, like on maxshop site, it will be a little bit complicated
            //Check if there's a ViewAll button? Most cases there will be not, In Maxshop's case there is.

            //if last page is given then we do not do the complicated steps above. Example of website that already give the last page, https://www.glassons.com/clothing/tops, https://www.numberoneshoes.co.nz/womens

            //if on page 1,
            //Determine the last page. Given the total number of products and product display per page, we can compute the last page (if not given) by Math.Ceil(totalproducts/productperpage)
            //if not just execute the code below to scrape the current page.


            //after determining the pages, we can decide how we want to scrape the products to the other pages.
            //if we want to parallel scrapping or linear. Scrapping.PageListScrapper.IsParallelScrapping
            //Maximum of 4 thread running
        }
Exemple #3
0
        public void CanDetermineIfFirstPageByLookingAtThePaginationDOM_ShouldNOTPass_InvalidDOMPage()
        {
            //Arrange
            var scrapper = new PageListScrapper(new SettingRepository(), _loggingService.Object, _webQueryService.Object, MightyAppePageListScrapperTest.LoadHtmlFromText())
            {
                PaginationSettings = new PageListPagination {
                    PaginationSelector = ".pagination li active span"
                }
            };

            var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper);

            //Act
            var result = pageListPaginationService.IsFirstPage(scrapper.GetLoadedHtmlNode());

            //Asserts
            Assert.That(result, Is.EqualTo(false));
        }