/// <summary> /// Sanitizes the specified HTML. /// </summary> /// <param name="html">The HTML to sanitize.</param> /// <param name="baseUrl">The base URL relative URLs are resolved against. No resolution if empty.</param> /// <returns>The sanitized HTML.</returns> public string Sanitize(string html, string baseUrl = "") { var dom = CQ.Create(html); dom["*"].Not(string.Join(",", AllowedTags.ToArray())).Remove(); foreach (var tag in dom["*"]) { foreach (var attribute in tag.Attributes.Where(a => !AllowedAttributesSet.Contains(a.Key)).ToList()) { tag.RemoveAttribute(attribute.Key); } foreach (var attribute in tag.Attributes.Where(a => UriAttributes.Contains(a.Key)).ToList()) { var url = SanitizeUrl(attribute.Value, baseUrl); if (url == null) { tag.RemoveAttribute(attribute.Key); } else { tag.SetAttribute(attribute.Key, url); } } SanitizeStyle(tag.Style, baseUrl); foreach (var attribute in tag.Attributes.ToList()) { if (JSInclude.IsMatch(attribute.Value)) { tag.RemoveAttribute(attribute.Key); } var val = attribute.Value; if (val.Contains('<')) { val = val.Replace("<", "<"); tag.SetAttribute(attribute.Key, val); } if (val.Contains('>')) { val = val.Replace(">", ">"); tag.SetAttribute(attribute.Key, val); } } } var output = dom.Render(DomRenderingOptions.RemoveComments | DomRenderingOptions.QuoteAllAttributes); return(output); }
private void Sanitize(INode node) { if (node is IElement htmlElement) { if (!IsValidNode(htmlElement.TagName)) { htmlElement.Remove(); return; } for (var i = htmlElement.Attributes.Length - 1; i >= 0; i--) { var attribute = htmlElement.Attributes[i]; if (!IsValidAttribute(attribute.Name)) { htmlElement.RemoveAttribute(attribute.NamespaceUri, attribute.Name); } else if (UriAttributes.Contains(attribute.Name)) { if (!UrlSanitizer.IsSafeUrl(attribute.Value)) { attribute.Value = ""; } } else if (SrcsetAttributes.Contains(attribute.Name)) { if (!UrlSanitizer.IsSafeSrcset(attribute.Value)) { attribute.Value = ""; } } } } for (var i = node.ChildNodes.Length - 1; i >= 0; i--) { Sanitize(node.ChildNodes[i]); } }
/// <summary> /// Determines whether the specified attribute can contain a URI. /// </summary> /// <param name="attribute">The attribute.</param> /// <returns><c>true</c> if the attribute can contain a URI; otherwise, <c>false</c>.</returns> private bool IsUriAttribute(IAttr attribute) { return(UriAttributes.Contains(attribute.Name)); }
/// <summary> /// Determines whether the specified attribute can contain a URI. /// </summary> /// <param name="attribute">The attribute.</param> /// <returns><c>true</c> if the attribute can contain a URI; otherwise, <c>false</c>.</returns> private bool IsUriAttribute(KeyValuePair <string, string> attribute) { return(UriAttributes.Contains(attribute.Key)); }