public static string GetHtmlDocument(string url) { var html = GetHtmlContent(url); AdaptationService service = new AdaptationService(); var adaptationTechniques = service.AppliedTechniques(); //download all files if (!url.StartsWith("file:///")) { DownloadFiles(url, html); } //This method makes adaptation for navigation enrichment technique //which add a skip link on the top of the page if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Navigation.ToString())) { NavigationEnrichmentTechnique.MakeAdaptation(html); } //This method makes adaptation for links contains Read More text //An attribute is added to make link more undestandable if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Link.ToString())) { string[] readMoreLinks = service.ListTextToRead(); LinkEnrichmentTechnique.MakeAdaptation(html, url, readMoreLinks); } //This method makes adaptation for images if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Image.ToString())) { ImageEnrichmentTechnique.MakeAdaptation(html, url); } return(html.DocumentNode.OuterHtml); }
public string GetResult(string url) { bool getData = false; string agent = "MOZILLA/5.0 (WINDOWS NT 6.1; WOW64) APPLEWEBKIT/537.1 (KHTML, LIKE GECKO) CHROME/21.0.1180.75 SAFARI/537.1"; HtmlDocument htmlDoc = new HtmlDocument(); if (getData) { using (var client = new WebClient()) { client.Headers["User-Agent"] = agent; var data = client.DownloadData(url); htmlDoc.LoadHtml(Encoding.UTF8.GetString(data)); } } else { // var content = GetContentResponse(url); // htmlDoc.LoadHtml(content); //this code download all html /* * var driver = new PhantomJSDriver(); * driver.Url = url; * driver.Navigate(); * //the driver can now provide you with what you need (it will execute the script) * //get the source of the page * var source = driver.PageSource; * //fully navigate the dom * // var pathElement = driver.FindElementById("A1"); * htmlDoc.LoadHtml(source); */ using (var client = new WebClient()) { client.Headers["User-Agent"] = agent; client.Encoding = Encoding.UTF8; Uri uri = new Uri(url); string content = client.DownloadString(uri); htmlDoc.LoadHtml(content); } } try { AdaptationService service = new AdaptationService(); var adaptationTechniques = service.AppliedTechniques(); //download all files //if (!url.StartsWith("file:///")) //{ DownloadFiles(url, htmlDoc); } //This method makes adaptation for navigation enrichment technique //which add a skip link on the top of the page if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Navigation.ToString())) { NavigationEnrichmentTechnique.MakeAdaptation(htmlDoc); } //This method makes adaptation for links contains Read More text //An attribute is added to make link more undestandable if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Link.ToString())) { string[] readMoreLinks = service.ListTextToRead(); LinkEnrichmentTechnique.MakeAdaptation(htmlDoc, url, readMoreLinks); } //This method makes adaptation for images if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Image.ToString())) { ImageEnrichmentTechnique.MakeAdaptation(htmlDoc, url); } //This method makes adaptation for images if (adaptationTechniques.Contains(AdaptationTechniquesEnum.TextImage.ToString())) { ImageEnrichmentTechnique.ImagesInsideLinks(htmlDoc, url); } if (htmlDoc.DocumentNode != null) { HtmlNode scripts = htmlDoc.CreateElement("script"); scripts.Attributes.Add("type", "text/javascript"); scripts.AppendChild(htmlDoc.CreateTextNode("clearTimeout();")); scripts.AppendChild(htmlDoc.CreateTextNode("clearInterval();")); htmlDoc.DocumentNode.AppendChild(scripts); } return(htmlDoc.DocumentNode.OuterHtml); } catch (Exception ex) { return("AdaptaitionFailed <br />" + ex.Message + "<br />" + ex.Source + "<br />"); } }