public WebContentDetails GetWebsiteContent([FromBody] WebAddress website)
        {
            WebContentDetails websiteContentCountResult = new WebContentDetails();

            websiteContentCountResult.WebsiteURL = website.websiteURL;
            DownloadWebsiteContent(website.websiteURL, websiteContentCountResult);

            return(websiteContentCountResult);
        }
 /// <summary>
 /// Download the file and save as html to get and process the file
 /// </summary>
 /// <param name="websiteURL">string</param>
 /// <param name="websiteContentCountResult">WebContentDetails</param>
 private void DownloadWebsiteContent(string websiteURL, WebContentDetails websiteContentCountResult)
 {
     if (!string.IsNullOrEmpty(websiteURL))
     {
         string fileName = Regex.Replace(websiteURL, @"[^0-9a-zA-Z]+", "");
         string path     = @"D:\" + fileName.Trim() + @".html";
         if (!File.Exists(path))
         {
             File.Create(path).Dispose();
         }
         using (var webClient = new System.Net.WebClient())
         {
             webClient.DownloadFile(websiteURL, path);
         }
         ProcessContent(path, websiteContentCountResult);
     }
 }
        /// <summary>
        /// Process the file to get the list of image and number of words
        /// </summary>
        /// <param name="filePath"></param>
        /// <param name="websiteContentCountResult"></param>
        private void ProcessContent(string filePath, WebContentDetails websiteContentCountResult)
        {
            string       fileContent = File.ReadAllText(filePath);
            HtmlDocument doc         = new HtmlDocument();

            doc.LoadHtml(fileContent);
            HtmlNode      bodyHtml     = doc.DocumentNode.SelectSingleNode("//body");
            string        content      = bodyHtml.InnerHtml;
            int           wordCount    = 0;
            List <string> ImageUrlList = new List <string>();

            if (!string.IsNullOrEmpty(content))
            {
                ExtractImages(ImageUrlList, content);
                wordCount = CountWords(content);
            }
            websiteContentCountResult.ImageUrl     = ImageUrlList;
            websiteContentCountResult.WordCount    = wordCount;
            websiteContentCountResult.TopWordCount = topWordCountList;
        }