/// <summary> /// Downloads a given website as a string, then takes the body content /// and returns a WordCloudDto representing the top [wordCount] words /// </summary> /// <param name="url"></param> /// <param name="wordCount"></param> /// <returns></returns> public async Task <WordCloudDto> GetWordsFromSite(string url, int wordCount) { Dictionary <string, int> allWords = new Dictionary <string, int>(); using (var client = new HttpClient()) { var content = await client.GetStringAsync(url); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); // Get all text nodes inside the body of the page and exclude any scripts var words = doc.DocumentNode .SelectNodes("//body//text()[not(parent::script)]") .Select(node => node.InnerText); allWords = GetWordCountDictionary(words); } var result = new WordCloudDto { Url = url, Words = GenerateWords(allWords.OrderByDescending(w => w.Value).Take(wordCount).ToDictionary(w => w.Key, w => w.Value)) }; return(result); }
public async Task <ActionResult> GetContent(WordCloudDto model) { var results = await _mediator.Send(new ListWords.Query { Url = model.Url, Count = 100 }); await _mediator.Send(new Create.Command { Words = results.Words }); return(View("~/Views/Home/Index.cshtml", results)); }