/// <summary> /// If the page list has more than 1 page, we scrape all the other pages on the same category too /// </summary> public virtual void ScrapeOtherPages(int lastPage, IPageListScrapper firstPageScrapper) { var actions = new List <Action>(); for (var i = 2; i <= lastPage; i++) { var scrapper = new PageListScrapper(_settingRepository, _loggingService, _webQueryService); firstPageScrapper.Clone(scrapper); scrapper.Url = GetNextUrl(i, firstPageScrapper.Url); actions.Add(() => { var data = scrapper.Scrape(); _resultCollectionService.SaveScrappedData(data.Result); }); } //Invoke all the tasks try { Parallel.Invoke(new ParallelOptions { MaxDegreeOfParallelism = 8 }, actions.ToArray()); } catch (AggregateException ex) { var exceptions = string.Join(ex.InnerExceptions.ToString(), ","); _loggingService.Logger.Error(exceptions); } }
/// <summary> /// Clone only the properties that are native (string, int, bool) /// If you want to copy all the complex properties like classes, use DeepClone /// </summary> /// <param name="scrapper"></param> public virtual void Clone(IPageListScrapper scrapper) { scrapper.ItemXPath = ItemXPath; scrapper.LoadMoreOnSamePage = LoadMoreOnSamePage; scrapper.WaitForJavascriptToLoad = WaitForJavascriptToLoad; scrapper.Url = Url; //Since this is not really that important to cloning. We could just reference to the existing object from the memory scrapper.Items = Items; scrapper.PaginationSettings = PaginationSettings; }
public virtual void ScrapeOtherPages(int lastPage, IPageListScrapper firstPageScrapper) { throw new NotImplementedException(); }
public PageListPaginationService(ISettingRepository settingRepository, ILoggingService loggingService, IPageListScrapper scrapper) { _settingRepository = settingRepository; _loggingService = loggingService; _scrapper = scrapper; }