public async Task TestFixtureSetup() { UnitTestConfig unitTestConfig = new UnitTestConfig(); PageRequester pageRequster = new PageRequester(new CrawlConfiguration { UserAgentString = "aaa" }); _goodPageResult = await pageRequster.MakeRequestAsync(new Uri(unitTestConfig.SiteSimulatorBaseAddress)); _badPageResult = await pageRequster.MakeRequestAsync(new Uri(string.Concat(unitTestConfig.SiteSimulatorBaseAddress, "/HttpResponse/Status404"))); }
public async Task MakeRequestAsync_RealCall_ReturnsExpectedCrawledPageObject() { //Arrange var unitUnderTest = new PageRequester( new CrawlConfiguration() { IsSslCertificateValidationEnabled = false, IsAlwaysLogin = true, IsHttpRequestAutomaticDecompressionEnabled = true, IsSendingCookiesEnabled = true, HttpProtocolVersion = HttpProtocolVersion.Version10 }, new WebContentExtractor()); var google = new Uri("https://google.com/"); //Act var result = await unitUnderTest.MakeRequestAsync(google); //Assert Assert.IsNull(result.HttpRequestException); Assert.AreSame(google, result.Uri); Assert.IsNotNull(result.HttpRequestMessage); Assert.IsNotNull(result.HttpResponseMessage); Assert.IsNotNull(result.Content); Assert.AreNotEqual("", result.Content.Text); unitUnderTest.Dispose(); }
private static async Task DemoPageRequester() { var pageRequester = new PageRequester(new CrawlConfiguration(), new WebContentExtractor()); //var result = await pageRequester.MakeRequestAsync(new Uri("http://google.com")); var result = await pageRequester.MakeRequestAsync(new Uri("http://wvtesting2.com")); Log.Logger.Information("{result}", new { url = result.Uri, status = Convert.ToInt32(result.HttpResponseMessage.StatusCode) }); }
private async Task DemoPageRequester() { var pageRequester = new PageRequester(new CrawlConfiguration(), new WebContentExtractor()); //var result = await pageRequester.MakeRequestAsync(new Uri("http://google.com")); var result = await pageRequester.MakeRequestAsync(new Uri("https://diksiyonaryo.ph")); Console.WriteLine("{result}" + new { url = result.Uri, status = Convert.ToInt32(result.HttpResponseMessage.StatusCode) }); }
private static async Task DemoSinglePageRequest() { var pageRequester = new PageRequester(new CrawlConfiguration(), new WebContentExtractor()); var crawledPage = await pageRequester.MakeRequestAsync(new Uri("http://msn.com")); Log.Logger.Information("{result}", new { url = crawledPage.Uri, status = Convert.ToInt32(crawledPage.HttpResponseMessage.StatusCode) }); }
public async Task <SongsPageInfo> GetSongsPageInfo(long artistId) { var url = "https://songmeanings.com/artist/view/songs/" + artistId; if (!IsValidUrl(url)) { return(SongsPageInfo.Invalid); } var crawledPage = await _pageRequester.MakeRequestAsync(new Uri(url)); var htmlLinks = crawledPage.AngleSharpHtmlDocument.QuerySelectorAll("#songslist tr td:first-child a"); var artistName = crawledPage.AngleSharpHtmlDocument.QuerySelector("div.heading a:first-of-type").TextContent; var result = new SongsPageInfo(artistName); foreach (var link in htmlLinks) { result.SongsUrls.Add($"https:{link.GetAttribute("href")}"); } return(result); }
public async Task MakeRequest_200_ReturnsValidResponse() { CrawledPage result = await _unitUnderTest.MakeRequestAsync(_validUri); Assert.IsNotNull(result); Assert.IsNotNull(result.HttpRequestMessage); Assert.IsNotNull(result.HttpWebResponse); Assert.IsNull(result.HttpRequestException); Assert.IsFalse(string.IsNullOrWhiteSpace(result.Content.Text)); Assert.IsNotNull(result.HtmlDocument); Assert.AreEqual(200, (int)result.HttpWebResponse.StatusCode); Assert.IsTrue(result.Content.Bytes.Length > 900 && result.Content.Bytes.Length < 1400); DateTime fiveSecsAgo = DateTime.Now.AddSeconds(-5); Assert.IsTrue(fiveSecsAgo < result.RequestStarted); Assert.IsTrue(fiveSecsAgo < result.RequestCompleted); Assert.IsNotNull(result.DownloadContentStarted); Assert.IsNotNull(result.DownloadContentCompleted); Assert.IsTrue(fiveSecsAgo < result.DownloadContentStarted); Assert.IsTrue(fiveSecsAgo < result.DownloadContentCompleted); }
private static async Task DemoSinglePageRequest() { var pageRequester = new PageRequester(new CrawlConfiguration(), new WebContentExtractor()); var crawledPage = await pageRequester.MakeRequestAsync(new Uri("https://www.onet.pl")); var articleTitles = crawledPage.AngleSharpHtmlDocument.All .Where(x => x.LocalName == "span" && x.ClassName == "title" && !string.IsNullOrWhiteSpace(x.TextContent)) .Select(x => x.TextContent) .ToList(); Log.Logger.Information("{result}", new { url = crawledPage.Uri, status = Convert.ToInt32(crawledPage.HttpResponseMessage.StatusCode), rawResponse = crawledPage.AngleSharpHtmlDocument.QuerySelectorAll("article") }); }
private async Task <CrawledPage> MakeOneRequestAsync(Uri uri) { var perfWatch = Stopwatch.StartNew(); await lastCalls.WaitForCallAsync(uri) .ConfigureAwait(false); using (var contentExtractor = new WebContentExtractor()) { using (var pageRequester = new PageRequester(new CrawlConfiguration(), contentExtractor)) { var page = await pageRequester.MakeRequestAsync(uri).ConfigureAwait(false); if (page.HttpRequestException != null) { throw new InvalidOperationException("HTTP error.", page.HttpRequestException); } logger.LogDebug("Request to {Url} took {Elapsed}", uri, perfWatch.Elapsed); return(page); } } }
public async Task MakeRequestAsync_NullUri() { await _unitUnderTest.MakeRequestAsync(null); }