Пример #1
0
        public virtual Pagination ScrapePagination(
            IDocument document,
            PaginatedPagesSettings paginatedSettings)
        {
            var pagination = new Pagination
            {
                TotalPages  = document.QuerySelector(paginatedSettings.TotalPagesSelector),
                CurrentPage = document.QuerySelector(paginatedSettings.CurrentPageSelector)
            };

            if (!string.IsNullOrEmpty(paginatedSettings.FirstPageSelector))
            {
                pagination.FirstPage = document.QuerySelector(paginatedSettings.FirstPageSelector);
            }

            if (!string.IsNullOrEmpty(paginatedSettings.FirstPageSelector))
            {
                pagination.PreviousPage = document.QuerySelector(paginatedSettings.PreviousPageSelector);
            }

            if (!string.IsNullOrEmpty(paginatedSettings.FirstPageSelector))
            {
                pagination.Pages = document.QuerySelectorAll(paginatedSettings.PagesSelector).ToList();
            }

            if (!string.IsNullOrEmpty(paginatedSettings.FirstPageSelector))
            {
                pagination.NextPage = document.QuerySelector(paginatedSettings.NextPageSelector);
            }

            if (!string.IsNullOrEmpty(paginatedSettings.FirstPageSelector))
            {
                pagination.LastPage = document.QuerySelector(paginatedSettings.LastPageSelector);
            }

            return(pagination);
        }
Пример #2
0
        public async Task PaginatedPageScraper_ShouldBeOk()
        {
            //Arrange
            var settings = new PaginatedPagesSettings
            {
                FirstPageSelector    = "a.page_first",
                PreviousPageSelector = "a.page_prev",
                PagesSelector        = "span.pagenrs > a",
                CurrentPageSelector  = "span.page_selected",
                NextPageSelector     = "a.page_next",
                LastPageSelector     = "a.page_last",
                Url = "http://te.tournamentsoftware.com/ranking/category.aspx?id=18031&category=516&ogid=C1DBDB91-4E25-4D36-AD94-B404E369E50F&C516FOG=&p=1&ps=10",
                ListItemsSelector  = "table.ruler > tbody > tr:not(:first-child):not(:last-child)",
                TotalPagesSelector = "span.page_caption",
                Properties         = new[] {
                    new PropertySettings {
                        Name     = "Ranking",
                        Selector = "td.rank:nth-child(1)"
                    },
                    new PropertySettings {
                        Name     = "Player",
                        Selector = "td:nth-child(4) > a"
                    },
                    new PropertySettings {
                        Name     = "YearOfBirth",
                        Selector = "td:nth-child(5)"
                    },
                    new PropertySettings {
                        Name     = "Points",
                        Selector = "td:nth-child(6)"
                    },
                    new PropertySettings {
                        Name     = "TotalPoints",
                        Selector = "td:nth-child(9)"
                    },
                    new PropertySettings {
                        Name     = "Tournaments",
                        Selector = "td:nth-child(10)"
                    },
                    new PropertySettings {
                        Name     = "Country",
                        Selector = "td:nth-child(11) > a"
                    }
                }
            };

            //Act
            var scraper = new PaginatedPagesScraper(
                settings,
                this._loggerMock.Object);

            scraper.BeforeDocumentOpen += (sender, e) =>
            {
                sender.Should().BeOfType <PaginatedPagesScraper>();
                e.Url.Should().NotBeNullOrEmpty();
            };
            scraper.AfterDocumentOpen += (sender, e) =>
            {
                sender.Should().BeOfType <PaginatedPagesScraper>();
                e.Document.Should().NotBeNull();
            };
            scraper.ListPageScraped += (sender, e) =>
            {
                sender.Should().BeOfType <PaginatedPagesScraper>();
                e.Url.Should().NotBeNullOrEmpty();
                e.Settings.Should().NotBeNull();
                e.ListProperties.Should().NotBeNull();
            };
            scraper.PropertyScraped += (sender, e) =>
            {
                sender.Should().BeOfType <PaginatedPagesScraper>();
                e.ScrapedProperty.Should().NotBeNull();
                e.ScrapedProperty.Name.Should().NotBeNullOrEmpty();
                e.ScrapedProperty.Element.Should().NotBeNull();
                e.Settings.Should().NotBeNull();
            };

            await scraper.Scrape();
        }
Пример #3
0
 public PaginatedPagesScraper(
     PaginatedPagesSettings settings,
     ILogger logger)
     : base(settings, logger)
 {
 }