public async Task TestFixtureSetup()
        {
            UnitTestConfig unitTestConfig = new UnitTestConfig();
            PageRequester  pageRequster   = new PageRequester(new CrawlConfiguration {
                UserAgentString = "aaa"
            });

            _goodPageResult = await pageRequster.MakeRequestAsync(new Uri(unitTestConfig.SiteSimulatorBaseAddress));

            _badPageResult = await pageRequster.MakeRequestAsync(new Uri(string.Concat(unitTestConfig.SiteSimulatorBaseAddress, "/HttpResponse/Status404")));
        }
        public async Task MakeRequestAsync_RealCall_ReturnsExpectedCrawledPageObject()
        {
            //Arrange
            var unitUnderTest = new PageRequester(
                new CrawlConfiguration()
            {
                IsSslCertificateValidationEnabled = false,
                IsAlwaysLogin = true,
                IsHttpRequestAutomaticDecompressionEnabled = true,
                IsSendingCookiesEnabled = true,
                HttpProtocolVersion     = HttpProtocolVersion.Version10
            },
                new WebContentExtractor());
            var google = new Uri("https://google.com/");

            //Act
            var result = await unitUnderTest.MakeRequestAsync(google);

            //Assert
            Assert.IsNull(result.HttpRequestException);
            Assert.AreSame(google, result.Uri);
            Assert.IsNotNull(result.HttpRequestMessage);
            Assert.IsNotNull(result.HttpResponseMessage);
            Assert.IsNotNull(result.Content);

            Assert.AreNotEqual("", result.Content.Text);

            unitUnderTest.Dispose();
        }
Exemple #3
0
        private static async Task DemoPageRequester()
        {
            var pageRequester =
                new PageRequester(new CrawlConfiguration(), new WebContentExtractor());

            //var result = await pageRequester.MakeRequestAsync(new Uri("http://google.com"));
            var result = await pageRequester.MakeRequestAsync(new Uri("http://wvtesting2.com"));

            Log.Logger.Information("{result}", new { url = result.Uri, status = Convert.ToInt32(result.HttpResponseMessage.StatusCode) });
        }
Exemple #4
0
        private async Task DemoPageRequester()
        {
            var pageRequester =
                new PageRequester(new CrawlConfiguration(), new WebContentExtractor());

            //var result = await pageRequester.MakeRequestAsync(new Uri("http://google.com"));
            var result = await pageRequester.MakeRequestAsync(new Uri("https://diksiyonaryo.ph"));

            Console.WriteLine("{result}" + new { url = result.Uri, status = Convert.ToInt32(result.HttpResponseMessage.StatusCode) });
        }
Exemple #5
0
        private static async Task DemoSinglePageRequest()
        {
            var pageRequester = new PageRequester(new CrawlConfiguration(), new WebContentExtractor());

            var crawledPage = await pageRequester.MakeRequestAsync(new Uri("http://msn.com"));

            Log.Logger.Information("{result}", new
            {
                url    = crawledPage.Uri,
                status = Convert.ToInt32(crawledPage.HttpResponseMessage.StatusCode)
            });
        }
Exemple #6
0
        public async Task <SongsPageInfo> GetSongsPageInfo(long artistId)
        {
            var url = "https://songmeanings.com/artist/view/songs/" + artistId;

            if (!IsValidUrl(url))
            {
                return(SongsPageInfo.Invalid);
            }

            var crawledPage = await _pageRequester.MakeRequestAsync(new Uri(url));

            var htmlLinks  = crawledPage.AngleSharpHtmlDocument.QuerySelectorAll("#songslist tr td:first-child a");
            var artistName = crawledPage.AngleSharpHtmlDocument.QuerySelector("div.heading a:first-of-type").TextContent;
            var result     = new SongsPageInfo(artistName);

            foreach (var link in htmlLinks)
            {
                result.SongsUrls.Add($"https:{link.GetAttribute("href")}");
            }

            return(result);
        }
        public async Task MakeRequest_200_ReturnsValidResponse()
        {
            CrawledPage result = await _unitUnderTest.MakeRequestAsync(_validUri);

            Assert.IsNotNull(result);
            Assert.IsNotNull(result.HttpRequestMessage);
            Assert.IsNotNull(result.HttpWebResponse);
            Assert.IsNull(result.HttpRequestException);
            Assert.IsFalse(string.IsNullOrWhiteSpace(result.Content.Text));
            Assert.IsNotNull(result.HtmlDocument);
            Assert.AreEqual(200, (int)result.HttpWebResponse.StatusCode);
            Assert.IsTrue(result.Content.Bytes.Length > 900 && result.Content.Bytes.Length < 1400);

            DateTime fiveSecsAgo = DateTime.Now.AddSeconds(-5);

            Assert.IsTrue(fiveSecsAgo < result.RequestStarted);
            Assert.IsTrue(fiveSecsAgo < result.RequestCompleted);
            Assert.IsNotNull(result.DownloadContentStarted);
            Assert.IsNotNull(result.DownloadContentCompleted);
            Assert.IsTrue(fiveSecsAgo < result.DownloadContentStarted);
            Assert.IsTrue(fiveSecsAgo < result.DownloadContentCompleted);
        }
Exemple #8
0
        private static async Task DemoSinglePageRequest()
        {
            var pageRequester = new PageRequester(new CrawlConfiguration(), new WebContentExtractor());

            var crawledPage = await pageRequester.MakeRequestAsync(new Uri("https://www.onet.pl"));

            var articleTitles = crawledPage.AngleSharpHtmlDocument.All
                                .Where(x => x.LocalName == "span" && x.ClassName == "title" && !string.IsNullOrWhiteSpace(x.TextContent))
                                .Select(x => x.TextContent)
                                .ToList();

            Log.Logger.Information("{result}", new
            {
                url         = crawledPage.Uri,
                status      = Convert.ToInt32(crawledPage.HttpResponseMessage.StatusCode),
                rawResponse = crawledPage.AngleSharpHtmlDocument.QuerySelectorAll("article")
            });
        }
Exemple #9
0
        private async Task <CrawledPage> MakeOneRequestAsync(Uri uri)
        {
            var perfWatch = Stopwatch.StartNew();

            await lastCalls.WaitForCallAsync(uri)
            .ConfigureAwait(false);

            using (var contentExtractor = new WebContentExtractor())
            {
                using (var pageRequester = new PageRequester(new CrawlConfiguration(), contentExtractor))
                {
                    var page = await pageRequester.MakeRequestAsync(uri).ConfigureAwait(false);

                    if (page.HttpRequestException != null)
                    {
                        throw new InvalidOperationException("HTTP error.", page.HttpRequestException);
                    }

                    logger.LogDebug("Request to {Url} took {Elapsed}", uri, perfWatch.Elapsed);
                    return(page);
                }
            }
        }
 public async Task MakeRequestAsync_NullUri()
 {
     await _unitUnderTest.MakeRequestAsync(null);
 }