예제 #1
0
        /// <summary>
        /// Downloads a given website as a string, then takes the body content
        /// and returns a WordCloudDto representing the top [wordCount] words
        /// </summary>
        /// <param name="url"></param>
        /// <param name="wordCount"></param>
        /// <returns></returns>
        public async Task <WordCloudDto> GetWordsFromSite(string url, int wordCount)
        {
            Dictionary <string, int> allWords = new Dictionary <string, int>();

            using (var client = new HttpClient())
            {
                var content = await client.GetStringAsync(url);

                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(content);

                // Get all text nodes inside the body of the page and exclude any scripts
                var words = doc.DocumentNode
                            .SelectNodes("//body//text()[not(parent::script)]")
                            .Select(node => node.InnerText);

                allWords = GetWordCountDictionary(words);
            }

            var result = new WordCloudDto
            {
                Url   = url,
                Words = GenerateWords(allWords.OrderByDescending(w => w.Value).Take(wordCount).ToDictionary(w => w.Key, w => w.Value))
            };

            return(result);
        }
예제 #2
0
        public async Task <ActionResult> GetContent(WordCloudDto model)
        {
            var results = await _mediator.Send(new ListWords.Query {
                Url = model.Url, Count = 100
            });

            await _mediator.Send(new Create.Command {
                Words = results.Words
            });

            return(View("~/Views/Home/Index.cshtml", results));
        }