Beispiel #1
0
        public async Task <List <ParteProcesso> > CrawlePartesProcessoAL(string url)
        {
            var linkReader = new DotnetCrawlerPageLinkReader(Request);

            var document = await linkReader.GetPageRedirect(url);

            var entity = await Processor.ProcessarPartes(document);

            return((List <ParteProcesso>)entity);
        }
Beispiel #2
0
        public async Task Crawle()
        {
            var linkReader = new DotnetCrawlerPageLinkReader(Request);
            var links      = await linkReader.GetLinks(Request.Url, 0);

            foreach (var url in links)
            {
                var document = await Downloader.Download(url);

                var entity = await Processor.Process(document);
            }
        }
Beispiel #3
0
        public async Task Crawle()
        {
            var linkReader             = new DotnetCrawlerPageLinkReader(new WebClientService());
            IEnumerable <string> links = await linkReader.GetLinksAsync(Request, 0);

            foreach (string url in links)
            {
                HtmlDocument document = await Downloader.Download(url);

                IEnumerable <TEntity> entity = await Processor.Process(document);

                await Pipeline.Run(entity);
            }
        }
Beispiel #4
0
        public void GetLinksAsync_ThrowsArgumentException()
        {
            DotnetCrawlerRequest request      = new DotnetCrawlerRequest();
            HtmlDocument         htmlDocument = new HtmlDocument();

            Mock <IWebClientService> webClientMock = new Mock <IWebClientService>();

            webClientMock.Setup(a => a.FromWebAsync(It.IsAny <string>())).ReturnsAsync(htmlDocument);

            DotnetCrawlerPageLinkReader linkReader = new DotnetCrawlerPageLinkReader(webClientMock.Object);

            linkReader.Invoking(y => y.GetLinksAsync(request, -1))
            .Should().Throw <ArgumentOutOfRangeException>()
            .Where(e => e.Message.StartsWith("Specified argument was out of the range"));
        }
Beispiel #5
0
        public async void GetLinksAsync_ReturnsNoLinks()
        {
            DotnetCrawlerRequest request      = new DotnetCrawlerRequest();
            HtmlDocument         htmlDocument = new HtmlDocument();

            Mock <IWebClientService> webClientMock = new Mock <IWebClientService>();

            webClientMock.Setup(a => a.FromWebAsync(It.IsAny <string>())).ReturnsAsync(htmlDocument);

            DotnetCrawlerPageLinkReader linkReader = new DotnetCrawlerPageLinkReader(webClientMock.Object);
            IEnumerable <string>        links      = await linkReader.GetLinksAsync(request);

            links.Should().BeEmpty();
            webClientMock.Verify(m => m.FromWebAsync(It.IsAny <string>()), Times.Exactly(1));
        }
Beispiel #6
0
        public async void GetLinksAsync_ReturnsAllLinks()
        {
            string html = RootHtml();

            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            DotnetCrawlerRequest request = new DotnetCrawlerRequest();

            Mock <IWebClientService> webClientMock = new Mock <IWebClientService>();

            webClientMock.Setup(a => a.FromWebAsync(It.IsAny <string>())).ReturnsAsync(htmlDocument);

            DotnetCrawlerPageLinkReader linkReader = new DotnetCrawlerPageLinkReader(webClientMock.Object);
            IEnumerable <string>        links      = await linkReader.GetLinksAsync(request);

            links.Should().NotBeEmpty().And.HaveCount(3).And.ContainItemsAssignableTo <string>();
            webClientMock.Verify(m => m.FromWebAsync(It.IsAny <string>()), Times.Exactly(1));
        }