public void TestFrequency() { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(RawHtml); var freq = new HtmlSummary().Frequency; HtmlSummary.CountFrequency(htmlDoc.DocumentNode, freq); freq["html"].Should().Be(1); freq.ContainsKey("div").Should().Be(false); freq["p"].Should().Be(28); freq["a"].Should().Be(26); freq.Keys.Any(_ => _.StartsWith("#")).Should().Be(false); }
public void TestCache() { var cache = new InstanceCache(); cache.Clear(); var summary = new HtmlSummary(HttpStatusCode.OK, "http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4", RawHtml, "text/html"); cache.Set("first", summary); cache.Set("second", summary); var first = cache.Get<HtmlSummary>("first"); var second = cache.Get<HtmlSummary>("second"); first.Should().NotBeNull(); second.Should().NotBeNull(); first.CreatedAt.Should().Be(second.CreatedAt); cache.Unset("first"); var none = cache.Get<HtmlSummary>("first"); none.Should().BeNull(); }
/// <summary> /// Fetch HTML from URI and generate summary /// </summary> /// <param name="uri">Remote URL</param> /// <returns>HTML Summary Object</returns> public static async Task<HtmlSummary> GenerateSummary(Uri uri) { using (var client = new HttpClient()) { var timer = Stopwatch.StartNew(); var response = await client.SendAsync(new HttpRequestMessage(HttpMethod.Get, uri)); var duration = timer.Elapsed.TotalMilliseconds; var body = await response.Content.ReadAsStringAsync(); var mediaType = response.Content.Headers.ContentType.IfNotNull(_ => _.MediaType); var summary = new HtmlSummary(response.StatusCode, uri.ToString(), body, mediaType) { PageLoadTime = duration }; return summary; } }