public void CanGetTheLastPageNumberIfTotalProductIsGiven() { //Arrange _settingRepository.Setup(x => x.GetSettingValue <bool>("PageListScrapper.Pagination.IsLastPageGiven")).Returns(false); _settingRepository.Setup(x => x.GetSettingValue <bool>("PageListScrapper.Pagination.IsTotalNumberOfProductsGiven")).Returns(true); _settingRepository.Setup(x => x.GetSettingValue <int>("PageListScrapper.Pagination.ProductsPerPage")).Returns(40); _settingRepository.Setup(x => x.GetSettingValue <string>("PageListScrapper.Pagination.TotalNumberOfProductsSelector")).Returns(".products .gallery-header .summary .results .total"); var scrapper = new PageListScrapper(new SettingRepository(), _loggingService.Object, _webQueryService.Object, MightyAppePageListScrapperTest.LoadHtmlFromText()) { PaginationSettings = new PageListPagination { PaginationSelector = ".pagination li active span" } }; var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper); var node = scrapper.GetLoadedHtmlNode(); //Act var lastPage = pageListPaginationService.GetLastPageNumber(node); //Assert Assert.That(lastPage, Is.EqualTo(13)); }
/// <summary> /// If the page list has more than 1 page, we scrape all the other pages on the same category too /// </summary> public virtual void ScrapeOtherPages(int lastPage, IPageListScrapper firstPageScrapper) { var actions = new List <Action>(); for (var i = 2; i <= lastPage; i++) { var scrapper = new PageListScrapper(_settingRepository, _loggingService, _webQueryService); firstPageScrapper.Clone(scrapper); scrapper.Url = GetNextUrl(i, firstPageScrapper.Url); actions.Add(() => { var data = scrapper.Scrape(); _resultCollectionService.SaveScrappedData(data.Result); }); } //Invoke all the tasks try { Parallel.Invoke(new ParallelOptions { MaxDegreeOfParallelism = 8 }, actions.ToArray()); } catch (AggregateException ex) { var exceptions = string.Join(ex.InnerExceptions.ToString(), ","); _loggingService.Logger.Error(exceptions); } }
public void OneTimeSetup() { var loggingPath = Path.Combine(ConfigurationManager.AppSettings["LoggingPath"], "MyMovies", "Log.txt"); var logger = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger(); var webQueryService = new Mock <IWebQueryService>(); _settingRepository = new Mock <ISettingRepository>(); _scrapper = new PageListScrapper(_settingRepository.Object, new LoggingService(logger), webQueryService.Object); }
public void OneTimeSetup() { //URL: https://www.maxshop.com/shop/tops/fashion-tops _htmlNode = LoadHtmlFromText(); var webQueryService = new Mock <IWebQueryService>(); var loggingPath = Path.Combine(ConfigurationManager.AppSettings["LoggingPath"], "Maxshop", "Log.txt"); var logger = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger(); _scrapper = new PageListScrapper(new SettingRepository(), new LoggingService(logger), webQueryService.Object); }
public void OneTimeSetup() { //URL: https://www.mightyape.co.nz/games/ps4/best-sellers _htmlNode = LoadHtmlFromText(); var webQueryService = new Mock <IWebQueryService>(); var loggingPath = Path.Combine(ConfigurationManager.AppSettings["LoggingPath"], "MightyApe", "Log.txt"); var logger = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger(); _scrapper = new PageListScrapper(new SettingRepository(), new LoggingService(logger), webQueryService.Object, _htmlNode); }
public PageListScrapperService(ISettingRepository settingRepository, PageListScrapper scrapper, ILoggingService loggingService, IResultCollectionService resultCollectionService, IWebQueryService webQueryService, IPageListPaginationService pageListPaginationService) { _settingRepository = settingRepository; _scrapper = scrapper; _loggingService = loggingService; _resultCollectionService = resultCollectionService; _webQueryService = webQueryService; _pageListPaginationService = pageListPaginationService; }
public void CanDetermineIfFirstPageFromUrl(PageListScrapper scrapper, bool isFirstPage) { //Arrange _settingRepository.Setup(x => x.GetSettingValue <string>("PageListScrapper.URL.QueryStringPageVariable")).Returns("page"); var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper); //Act var result = pageListPaginationService.IsFirstPage(null); //Asserts Assert.That(result, Is.EqualTo(isFirstPage)); }
public void OneTimeSetup() { _settingRepository = new Mock <ISettingRepository>(); _loggingService = new Mock <ILoggingService>(); _webQueryService = new Mock <IWebQueryService>(); var loggingPath = ConfigurationManager.AppSettings["LoggingPath"]; _logger = new LoggerConfiguration().WriteTo.File(loggingPath).CreateLogger(); _scrapper = new PageListScrapper(_settingRepository.Object, _loggingService.Object, _webQueryService.Object); _loggingService.Setup(x => x.Logger).Returns(_logger); }
public virtual IScrapperService GetScrapper() { if (string.Equals(_scrapper, ScrapperType.PageList, StringComparison.InvariantCultureIgnoreCase)) { var scrapper = new PageListScrapper(_settingRepository, _loggingService, _webQueryService); var pageListPaginationService = new PageListPaginationService(_settingRepository, _loggingService, scrapper); return(new PageListScrapperService(_settingRepository, scrapper, _loggingService, _resultCollectionService, _webQueryService, pageListPaginationService)); } throw new NotImplementedException(); //if (String.Equals(_scrapper, "PAGE_LIST", StringComparison.InvariantCultureIgnoreCase)) // return new PageDetailScrapper(logger); //return new SmartScrapper(logger); }
public void CanDetermineIfFirstPageByLookingAtThePaginationDOM_ShouldNOTPass_InvalidDOMPage() { //Arrange var scrapper = new PageListScrapper(new SettingRepository(), _loggingService.Object, _webQueryService.Object, MightyAppePageListScrapperTest.LoadHtmlFromText()) { PaginationSettings = new PageListPagination { PaginationSelector = ".pagination li active span" } }; var pageListPaginationService = new PageListPaginationService(_settingRepository.Object, _loggingService.Object, scrapper); //Act var result = pageListPaginationService.IsFirstPage(scrapper.GetLoadedHtmlNode()); //Asserts Assert.That(result, Is.EqualTo(false)); }
public void CanClonePageListScrapper() { //Arrange _scrapper.ItemXPath = "div.productList"; _scrapper.PaginationSettings = new PageListPagination { ShowLastPagination = true, PaginationSelector = ".pagination" }; _scrapper.Items = new List <ScrapeItemSetting> { new ScrapeItemSetting { Key = "Name", Selector = ".productname" }, new ScrapeItemSetting { Key = "Url", Selector = ".url" }, }; var newScrapper = new PageListScrapper(_settingRepository.Object, _loggingService.Object, _webQueryService.Object); //Act _scrapper.Clone(newScrapper); newScrapper.ItemXPath = "div .list"; newScrapper.PaginationSettings.ShowLastPagination = false; newScrapper.Items[0].Selector = "#productName"; //Asserts Assert.That(newScrapper.Items.Count, Is.EqualTo(2)); Assert.That(newScrapper.ItemXPath, Is.EqualTo("div .list")); Assert.That(_scrapper.Items[0].Selector, Is.EqualTo("#productName")); Assert.That(newScrapper.Items[0].Selector, Is.EqualTo("#productName")); Assert.That(_scrapper.ItemXPath, Is.EqualTo("div.productList")); Assert.That(newScrapper.PaginationSettings.ShowLastPagination, Is.EqualTo(false)); Assert.That(_scrapper.PaginationSettings.ShowLastPagination, Is.EqualTo(false)); }
private PageListScrapperService GetPageListScrapperService(PageListScrapper scrapper) { return(new PageListScrapperService(_settingRepository.Object, scrapper, _loggingService.Object, _resultCollectionService.Object, _webQueryService.Object, null)); }