public void TestFrequency()
 {
     var htmlDoc = new HtmlDocument();
     htmlDoc.LoadHtml(RawHtml);
     var freq = new HtmlSummary().Frequency;
     HtmlSummary.CountFrequency(htmlDoc.DocumentNode, freq);
     freq["html"].Should().Be(1);
     freq.ContainsKey("div").Should().Be(false);
     freq["p"].Should().Be(28);
     freq["a"].Should().Be(26);
     freq.Keys.Any(_ => _.StartsWith("#")).Should().Be(false);
 }
        public void TestCache()
        {
            var cache = new InstanceCache();
            cache.Clear();

            var summary = new HtmlSummary(HttpStatusCode.OK,
                "http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4", RawHtml, "text/html");
            cache.Set("first", summary);
            cache.Set("second", summary);

            var first = cache.Get<HtmlSummary>("first");
            var second = cache.Get<HtmlSummary>("second");

            first.Should().NotBeNull();
            second.Should().NotBeNull();
            first.CreatedAt.Should().Be(second.CreatedAt);

            cache.Unset("first");
            var none = cache.Get<HtmlSummary>("first");
            none.Should().BeNull();
        }
Example #3
0
        /// <summary>
        ///  Fetch HTML from URI and generate summary
        /// </summary>
        /// <param name="uri">Remote URL</param>
        /// <returns>HTML Summary Object</returns>
        public static async Task<HtmlSummary> GenerateSummary(Uri uri)
        {
            using (var client = new HttpClient())
            {
                var timer = Stopwatch.StartNew();
                var response = await client.SendAsync(new HttpRequestMessage(HttpMethod.Get, uri));
                var duration = timer.Elapsed.TotalMilliseconds;

                var body = await response.Content.ReadAsStringAsync();
                var mediaType = response.Content.Headers.ContentType.IfNotNull(_ => _.MediaType);
                
                var summary = new HtmlSummary(response.StatusCode, uri.ToString(), body, mediaType)
                {
                    PageLoadTime = duration
                };
                
                return summary;
            }
        }