/// <summary> /// Sanitizes the specified HTML. /// </summary> /// <param name="html">The HTML to sanitize.</param> /// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param> /// <param name="outputFormatter">The CsQuery output formatter used to render the DOM. Using the default formatter if null.</param> /// <returns>The sanitized HTML.</returns> public string Sanitize(string html, string baseUrl = "", IOutputFormatter outputFormatter = null) { var dom = CQ.Create(html); // remove non-whitelisted tags foreach (var tag in dom["*"].Where(t => !IsAllowedTag(t)).ToList()) { RemoveTag(tag); } // cleanup attributes foreach (var tag in dom["*"].ToList()) { // remove non-whitelisted attributes foreach (var attribute in tag.Attributes.Where(a => !IsAllowedAttribute(a)).ToList()) { RemoveAttribute(tag, attribute); } // sanitize URLs in URL-marked attributes foreach (var attribute in tag.Attributes.Where(IsUriAttribute).ToList()) { var url = SanitizeUrl(attribute.Value, baseUrl); if (url == null) RemoveAttribute(tag, attribute); else tag.SetAttribute(attribute.Key, url); } // sanitize the style attribute SanitizeStyle(tag.Style, baseUrl); // sanitize the value of the attributes foreach (var attribute in tag.Attributes.ToList()) { // The '& Javascript include' is a possible method to execute Javascript and can lead to XSS. // (see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#.26_JavaScript_includes) if (attribute.Value.Contains("&{")) RemoveAttribute(tag, attribute); else { // escape attribute value var val = attribute.Value.Replace("<", "<").Replace(">", ">"); tag.SetAttribute(attribute.Key, val); } } } if (PostProcessNode != null) { var nodes = GetAllNodes(dom).ToList(); foreach (var node in nodes) { var e = new PostProcessNodeEventArgs { Node = node }; OnPostProcessNode(e); if (e.ReplacementNodes.Any()) dom[node].ReplaceWith(e.ReplacementNodes); } } if (outputFormatter == null) outputFormatter = new FormatDefault(DomRenderingOptions.RemoveComments | DomRenderingOptions.QuoteAllAttributes, HtmlEncoders.Default); var output = dom.Render(outputFormatter); return output; }
/// <summary> /// Renders the complete HTML for this element, including its children. /// </summary> /// /// <returns> /// a string of HTML /// </returns> public virtual string Render(DomRenderingOptions options) { var formatter = new FormatDefault(options, HtmlEncoders.Default); return formatter.Render(this); }