public WebsiteStats GetAllData(string url) { try { var ImageList = new List <string>(); var WordFrequency = new Dictionary <string, int>(); //Utility class to get HTML document from http var web = new HtmlWeb(); //Load() Method download the specified HTML document from Internet resource. var doc = web.Load(url); var rootNode = doc.DocumentNode; var nodes = doc.DocumentNode.SelectNodes("//img"); foreach (var src in nodes) { if (src.Attributes["src"] != null) { ImageList.Add(src.Attributes["src"].Value); } } ImageList = ImageList.Distinct().Where(s => !string.IsNullOrWhiteSpace(s)).ToList(); var text = string.Empty; //fileter for words in the document which are not in the script or style tags and normalize the space foreach (var node in doc.DocumentNode.SelectNodes("//*[not(self::script or self::style)]/text()[normalize-space()]")) { text += " " + node.InnerText; } //Remove carriage return, newline characters and empty strings var wordsArray = Regex.Split(text.Replace("\r", "").Replace("\n", ""), @"\s+").Where(s => s != string.Empty); var TotalWordsCount = 0; if (wordsArray != null) { TotalWordsCount = wordsArray.Count(); foreach (var item in wordsArray) { if (WordFrequency.ContainsKey(item)) { WordFrequency[item]++; } else { WordFrequency.Add(item, 1); } } } var WebPageStats = new WebsiteStats { Images = ImageList, TotalWordsCount = TotalWordsCount, WordFrequency = WordFrequency }; return(WebPageStats); } catch (Exception ex) { // TODO; log error return(null); } }
public IActionResult ShowStats(string host) { WebsiteStatsFactory factory = new WebsiteStatsFactory(); WebsiteStats stats = (WebsiteStats)factory[host]; if (stats == null) { ViewBag.Message = "Invalid Host Name!"; return(View("Index")); } else { return(View(stats)); } }
public JsonResult GetImages(string WebUrl) { string JsonData = string.Empty; var WebPageData = new WebsiteStats(); var UrlModel = new UrlModel { urlWeb = WebUrl }; var ApiUrl = string.Format("{0}://{1}{2}", Request.Url.Scheme, Request.Url.Authority, Url.Content("~")); var Status = false; WebPageData = ApiHelper.HttpPostResult <WebsiteStats>(ApiUrl + "api/websiteinfo/loadurl", UrlModel).Result.Result; if (WebPageData != null) { Status = true; WebPageData.WordFrequency = (from item in WebPageData.WordFrequency orderby item.Value descending select item) .Take(10) .ToDictionary(x => x.Key, x => x.Value); JsonData = JsonConvert.SerializeObject(WebPageData); } return(Json(new { success = Status, ImageData = JsonData })); }