Ejemplo n.º 1
0
        /// <summary>
        /// Helper function that returns an HTML document from text
        /// </summary>
        private static HtmlDocument GetHtml(string source)
        {
            HtmlDocument html = new HtmlDocument();
            html.OptionFixNestedTags = true;
            html.OptionAutoCloseOnEnd = true;
            html.OptionDefaultStreamEncoding = Encoding.UTF8;

            html.LoadHtml(source ?? "");

            // Encode any code blocks independently so they won't
            // be stripped out completely when we do a final cleanup
            foreach (var n in html.DocumentNode.DescendantsAndSelf())
            {
                if (n.Name == "code")
                {
                    //** Code tag attribute vulnerability fix 28-9-12 (thanks to Natd)
                    HtmlAttribute[] attr = n.Attributes.ToArray();
                    foreach (HtmlAttribute a in attr)
                    {
                        if (a.Name != "style" && a.Name != "class") { a.Remove(); }
                    } //** End fix
                    n.InnerHtml = System.Net.WebUtility.HtmlEncode(System.Net.WebUtility.HtmlDecode(n.InnerHtml));
                }
            }

            return html;
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Begins the process of downloading an internet resource
        /// </summary>
        /// <param name="uri">Url to the html document</param>
        /// <param name="encoding">The encoding to use while downloading the document</param>
        /// <param name="credentials">The credentials to use for authenticating the web request</param>
        public async Task<HtmlDocument> LoadFromWebAsync(Uri uri, Encoding encoding, NetworkCredential credentials)
        {
            var clientHandler = new HttpClientHandler();
            if (credentials == null)
                clientHandler.UseDefaultCredentials = true;
            else
                clientHandler.Credentials = credentials;

            var client = new HttpClient(clientHandler);

            var e = await client.GetAsync(uri);
            if (e.StatusCode == HttpStatusCode.OK)
            {
                var html = string.Empty;
                if (encoding != null)
                {
                    using (var sr = new StreamReader(await e.Content.ReadAsStreamAsync(), encoding))
                    {
                        html = sr.ReadToEnd();
                    }
                }
                else
                    html = await e.Content.ReadAsStringAsync();
                var doc = new HtmlDocument();
                if (PreHandleDocument != null)
                    PreHandleDocument(doc);
                doc.LoadHtml(html);
                return doc;
            }
            throw new Exception("Error downloading html");
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Creates an HTML node from a string representing literal HTML.
 /// </summary>
 /// <param name="html">The HTML text.</param>
 /// <returns>The newly created node instance.</returns>
 public static HtmlNode CreateNode(string html)
 {
     // REVIEW: this is *not* optimum...
     HtmlDocument doc = new HtmlDocument();
     doc.LoadHtml(html);
     return doc.DocumentNode.FirstChild;
 }