public static string GetHtmlDocument(string url)
    {
        var html = GetHtmlContent(url);
        AdaptationService service = new AdaptationService();
        var adaptationTechniques  = service.AppliedTechniques();

        //download all files
        if (!url.StartsWith("file:///"))
        {
            DownloadFiles(url, html);
        }

        //This method makes adaptation for navigation enrichment technique
        //which add a skip link on the top of the page
        if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Navigation.ToString()))
        {
            NavigationEnrichmentTechnique.MakeAdaptation(html);
        }

        //This method makes adaptation for links contains Read More text
        //An attribute is added to make link more undestandable
        if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Link.ToString()))
        {
            string[] readMoreLinks = service.ListTextToRead();
            LinkEnrichmentTechnique.MakeAdaptation(html, url, readMoreLinks);
        }
        //This method makes adaptation for images
        if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Image.ToString()))
        {
            ImageEnrichmentTechnique.MakeAdaptation(html, url);
        }


        return(html.DocumentNode.OuterHtml);
    }
Exemplo n.º 2
0
        public string GetResult(string url)
        {
            bool   getData = false;
            string agent   = "MOZILLA/5.0 (WINDOWS NT 6.1; WOW64) APPLEWEBKIT/537.1 (KHTML, LIKE GECKO) CHROME/21.0.1180.75 SAFARI/537.1";

            HtmlDocument htmlDoc = new HtmlDocument();

            if (getData)
            {
                using (var client = new WebClient())
                {
                    client.Headers["User-Agent"] = agent;
                    var data = client.DownloadData(url);
                    htmlDoc.LoadHtml(Encoding.UTF8.GetString(data));
                }
            }
            else
            {
                //  var content = GetContentResponse(url);
                //  htmlDoc.LoadHtml(content);

                //this code download all html

                /*
                 * var driver = new PhantomJSDriver();
                 * driver.Url = url;
                 * driver.Navigate();
                 * //the driver can now provide you with what you need (it will execute the script)
                 * //get the source of the page
                 * var source = driver.PageSource;
                 * //fully navigate the dom
                 * // var pathElement = driver.FindElementById("A1");
                 * htmlDoc.LoadHtml(source);
                 */

                using (var client = new WebClient())
                {
                    client.Headers["User-Agent"] = agent;
                    client.Encoding = Encoding.UTF8;
                    Uri    uri     = new Uri(url);
                    string content = client.DownloadString(uri);
                    htmlDoc.LoadHtml(content);
                }
            }
            try
            {
                AdaptationService service = new AdaptationService();
                var adaptationTechniques  = service.AppliedTechniques();
                //download all files
                //if (!url.StartsWith("file:///"))
                //{ DownloadFiles(url, htmlDoc); }

                //This method makes adaptation for navigation enrichment technique
                //which add a skip link on the top of the page
                if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Navigation.ToString()))
                {
                    NavigationEnrichmentTechnique.MakeAdaptation(htmlDoc);
                }

                //This method makes adaptation for links contains Read More text
                //An attribute is added to make link more undestandable
                if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Link.ToString()))
                {
                    string[] readMoreLinks = service.ListTextToRead();
                    LinkEnrichmentTechnique.MakeAdaptation(htmlDoc, url, readMoreLinks);
                }
                //This method makes adaptation for images
                if (adaptationTechniques.Contains(AdaptationTechniquesEnum.Image.ToString()))
                {
                    ImageEnrichmentTechnique.MakeAdaptation(htmlDoc, url);
                }
                //This method makes adaptation for images
                if (adaptationTechniques.Contains(AdaptationTechniquesEnum.TextImage.ToString()))
                {
                    ImageEnrichmentTechnique.ImagesInsideLinks(htmlDoc, url);
                }

                if (htmlDoc.DocumentNode != null)
                {
                    HtmlNode scripts = htmlDoc.CreateElement("script");
                    scripts.Attributes.Add("type", "text/javascript");
                    scripts.AppendChild(htmlDoc.CreateTextNode("clearTimeout();"));
                    scripts.AppendChild(htmlDoc.CreateTextNode("clearInterval();"));
                    htmlDoc.DocumentNode.AppendChild(scripts);
                }

                return(htmlDoc.DocumentNode.OuterHtml);
            }
            catch (Exception ex)
            {
                return("AdaptaitionFailed <br />" + ex.Message + "<br />" + ex.Source + "<br />");
            }
        }