public async Task <List <ParteProcesso> > CrawlePartesProcessoAL(string url) { var linkReader = new DotnetCrawlerPageLinkReader(Request); var document = await linkReader.GetPageRedirect(url); var entity = await Processor.ProcessarPartes(document); return((List <ParteProcesso>)entity); }
public async Task Crawle() { var linkReader = new DotnetCrawlerPageLinkReader(Request); var links = await linkReader.GetLinks(Request.Url, 0); foreach (var url in links) { var document = await Downloader.Download(url); var entity = await Processor.Process(document); } }
public async Task Crawle() { var linkReader = new DotnetCrawlerPageLinkReader(new WebClientService()); IEnumerable <string> links = await linkReader.GetLinksAsync(Request, 0); foreach (string url in links) { HtmlDocument document = await Downloader.Download(url); IEnumerable <TEntity> entity = await Processor.Process(document); await Pipeline.Run(entity); } }
public void GetLinksAsync_ThrowsArgumentException() { DotnetCrawlerRequest request = new DotnetCrawlerRequest(); HtmlDocument htmlDocument = new HtmlDocument(); Mock <IWebClientService> webClientMock = new Mock <IWebClientService>(); webClientMock.Setup(a => a.FromWebAsync(It.IsAny <string>())).ReturnsAsync(htmlDocument); DotnetCrawlerPageLinkReader linkReader = new DotnetCrawlerPageLinkReader(webClientMock.Object); linkReader.Invoking(y => y.GetLinksAsync(request, -1)) .Should().Throw <ArgumentOutOfRangeException>() .Where(e => e.Message.StartsWith("Specified argument was out of the range")); }
public async void GetLinksAsync_ReturnsNoLinks() { DotnetCrawlerRequest request = new DotnetCrawlerRequest(); HtmlDocument htmlDocument = new HtmlDocument(); Mock <IWebClientService> webClientMock = new Mock <IWebClientService>(); webClientMock.Setup(a => a.FromWebAsync(It.IsAny <string>())).ReturnsAsync(htmlDocument); DotnetCrawlerPageLinkReader linkReader = new DotnetCrawlerPageLinkReader(webClientMock.Object); IEnumerable <string> links = await linkReader.GetLinksAsync(request); links.Should().BeEmpty(); webClientMock.Verify(m => m.FromWebAsync(It.IsAny <string>()), Times.Exactly(1)); }
public async void GetLinksAsync_ReturnsAllLinks() { string html = RootHtml(); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); DotnetCrawlerRequest request = new DotnetCrawlerRequest(); Mock <IWebClientService> webClientMock = new Mock <IWebClientService>(); webClientMock.Setup(a => a.FromWebAsync(It.IsAny <string>())).ReturnsAsync(htmlDocument); DotnetCrawlerPageLinkReader linkReader = new DotnetCrawlerPageLinkReader(webClientMock.Object); IEnumerable <string> links = await linkReader.GetLinksAsync(request); links.Should().NotBeEmpty().And.HaveCount(3).And.ContainItemsAssignableTo <string>(); webClientMock.Verify(m => m.FromWebAsync(It.IsAny <string>()), Times.Exactly(1)); }