Exemple #1
0
        public void CanGetTheLastPageNumberIfTotalProductIsGiven()
        {
            //Arrange
            _settingRepository.Setup(x => x.GetSettingValue <bool>("PageListScrapper.Pagination.IsLastPageGiven")).Returns(false);
            _settingRepository.Setup(x => x.GetSettingValue <bool>("PageListScrapper.Pagination.IsTotalNumberOfProductsGiven")).Returns(true);
            _settingRepository.Setup(x => x.GetSettingValue <int>("PageListScrapper.Pagination.ProductsPerPage")).Returns(40);
            _settingRepository.Setup(x => x.GetSettingValue <string>("PageListScrapper.Pagination.TotalNumberOfProductsSelector")).Returns(".products .gallery-header .summary .results .total");


            var scrapper = new PageListScrapper(new SettingRepository(), _loggingService.Object, _webQueryService.Object, MightyAppePageListScrapperTest.LoadHtmlFromText())
            {
                PaginationSettings = new PageListPagination {
                    PaginationSelector = ".pagination li active span"
                }
            };

            var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper);
            var node = scrapper.GetLoadedHtmlNode();

            //Act
            var lastPage = pageListPaginationService.GetLastPageNumber(node);

            //Assert
            Assert.That(lastPage, Is.EqualTo(13));
        }
        /// <summary>
        /// If the page list has more than 1 page, we scrape all the other pages on the same category too
        /// </summary>
        public virtual void ScrapeOtherPages(int lastPage, IPageListScrapper firstPageScrapper)
        {
            var actions = new List <Action>();

            for (var i = 2; i <= lastPage; i++)
            {
                var scrapper = new PageListScrapper(_settingRepository, _loggingService, _webQueryService);
                firstPageScrapper.Clone(scrapper);
                scrapper.Url = GetNextUrl(i, firstPageScrapper.Url);

                actions.Add(() =>
                {
                    var data = scrapper.Scrape();
                    _resultCollectionService.SaveScrappedData(data.Result);
                });
            }

            //Invoke all the tasks
            try
            {
                Parallel.Invoke(new ParallelOptions {
                    MaxDegreeOfParallelism = 8
                }, actions.ToArray());
            }
            catch (AggregateException ex)
            {
                var exceptions = string.Join(ex.InnerExceptions.ToString(), ",");
                _loggingService.Logger.Error(exceptions);
            }
        }
        public void OneTimeSetup()
        {
            var loggingPath = Path.Combine(ConfigurationManager.AppSettings["LoggingPath"], "MyMovies", "Log.txt");
            var logger      = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger();

            var webQueryService = new Mock <IWebQueryService>();

            _settingRepository = new Mock <ISettingRepository>();
            _scrapper          = new PageListScrapper(_settingRepository.Object, new LoggingService(logger), webQueryService.Object);
        }
        public void OneTimeSetup()
        {
            //URL: https://www.maxshop.com/shop/tops/fashion-tops
            _htmlNode = LoadHtmlFromText();

            var webQueryService = new Mock <IWebQueryService>();
            var loggingPath     = Path.Combine(ConfigurationManager.AppSettings["LoggingPath"], "Maxshop", "Log.txt");
            var logger          = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger();

            _scrapper = new PageListScrapper(new SettingRepository(), new LoggingService(logger), webQueryService.Object);
        }
Exemple #5
0
        public void OneTimeSetup()
        {
            //URL: https://www.mightyape.co.nz/games/ps4/best-sellers
            _htmlNode = LoadHtmlFromText();

            var webQueryService = new Mock <IWebQueryService>();
            var loggingPath     = Path.Combine(ConfigurationManager.AppSettings["LoggingPath"], "MightyApe", "Log.txt");
            var logger          = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger();

            _scrapper = new PageListScrapper(new SettingRepository(), new LoggingService(logger), webQueryService.Object, _htmlNode);
        }
 public PageListScrapperService(ISettingRepository settingRepository, PageListScrapper scrapper,
                                ILoggingService loggingService,
                                IResultCollectionService resultCollectionService, IWebQueryService webQueryService,
                                IPageListPaginationService pageListPaginationService)
 {
     _settingRepository         = settingRepository;
     _scrapper                  = scrapper;
     _loggingService            = loggingService;
     _resultCollectionService   = resultCollectionService;
     _webQueryService           = webQueryService;
     _pageListPaginationService = pageListPaginationService;
 }
Exemple #7
0
        public void CanDetermineIfFirstPageFromUrl(PageListScrapper scrapper, bool isFirstPage)
        {
            //Arrange
            _settingRepository.Setup(x => x.GetSettingValue <string>("PageListScrapper.URL.QueryStringPageVariable")).Returns("page");
            var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper);

            //Act
            var result = pageListPaginationService.IsFirstPage(null);

            //Asserts
            Assert.That(result, Is.EqualTo(isFirstPage));
        }
Exemple #8
0
        public void OneTimeSetup()
        {
            _settingRepository = new Mock <ISettingRepository>();
            _loggingService    = new Mock <ILoggingService>();

            _webQueryService = new Mock <IWebQueryService>();
            var loggingPath = ConfigurationManager.AppSettings["LoggingPath"];

            _logger   = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger();
            _scrapper = new PageListScrapper(_settingRepository.Object, _loggingService.Object, _webQueryService.Object);

            _loggingService.Setup(x => x.Logger).Returns(_logger);
        }
Exemple #9
0
        public virtual IScrapperService GetScrapper()
        {
            if (string.Equals(_scrapper, ScrapperType.PageList, StringComparison.InvariantCultureIgnoreCase))
            {
                var scrapper = new PageListScrapper(_settingRepository, _loggingService, _webQueryService);
                var pageListPaginationService = new PageListPaginationService(_settingRepository, _loggingService, scrapper);
                return(new PageListScrapperService(_settingRepository, scrapper, _loggingService, _resultCollectionService, _webQueryService, pageListPaginationService));
            }

            throw new NotImplementedException();

            //if (String.Equals(_scrapper, "PAGE_LIST", StringComparison.InvariantCultureIgnoreCase))
            //    return new PageDetailScrapper(logger);

            //return new SmartScrapper(logger);
        }
Exemple #10
0
        public void CanDetermineIfFirstPageByLookingAtThePaginationDOM_ShouldNOTPass_InvalidDOMPage()
        {
            //Arrange
            var scrapper = new PageListScrapper(new SettingRepository(), _loggingService.Object, _webQueryService.Object, MightyAppePageListScrapperTest.LoadHtmlFromText())
            {
                PaginationSettings = new PageListPagination {
                    PaginationSelector = ".pagination li active span"
                }
            };

            var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper);

            //Act
            var result = pageListPaginationService.IsFirstPage(scrapper.GetLoadedHtmlNode());

            //Asserts
            Assert.That(result, Is.EqualTo(false));
        }
Exemple #11
0
        public void CanClonePageListScrapper()
        {
            //Arrange
            _scrapper.ItemXPath          = "div.productList";
            _scrapper.PaginationSettings = new PageListPagination
            {
                ShowLastPagination = true,
                PaginationSelector = ".pagination"
            };
            _scrapper.Items = new List <ScrapeItemSetting>
            {
                new ScrapeItemSetting {
                    Key = "Name", Selector = ".productname"
                },
                new ScrapeItemSetting {
                    Key = "Url", Selector = ".url"
                },
            };
            var newScrapper = new PageListScrapper(_settingRepository.Object, _loggingService.Object, _webQueryService.Object);


            //Act
            _scrapper.Clone(newScrapper);
            newScrapper.ItemXPath = "div .list";
            newScrapper.PaginationSettings.ShowLastPagination = false;
            newScrapper.Items[0].Selector = "#productName";

            //Asserts
            Assert.That(newScrapper.Items.Count, Is.EqualTo(2));
            Assert.That(newScrapper.ItemXPath, Is.EqualTo("div .list"));
            Assert.That(_scrapper.Items[0].Selector, Is.EqualTo("#productName"));
            Assert.That(newScrapper.Items[0].Selector, Is.EqualTo("#productName"));
            Assert.That(_scrapper.ItemXPath, Is.EqualTo("div.productList"));
            Assert.That(newScrapper.PaginationSettings.ShowLastPagination, Is.EqualTo(false));
            Assert.That(_scrapper.PaginationSettings.ShowLastPagination, Is.EqualTo(false));
        }
Exemple #12
0
 private PageListScrapperService GetPageListScrapperService(PageListScrapper scrapper)
 {
     return(new PageListScrapperService(_settingRepository.Object, scrapper, _loggingService.Object,
                                        _resultCollectionService.Object, _webQueryService.Object, null));
 }