public static HtmlString Sanitize(this string html) { // Run it through Markdown first var md = new Markdown(); html = md.Transform(html); // Add links to URLs that aren't "properly" linked in a markdown way var regex = new Regex(@"(^|\s|>|;)(https?|ftp)(:\/\/[-A-Z0-9+&@#\/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#\/%=~_|\[\]])($|\W)", RegexOptions.IgnoreCase | RegexOptions.Compiled); var linkedHtml = regex.Replace(html, "$1<a href=\"$2$3\">$2$3</a>$4").Replace("href=\"www", "href=\"http://www"); var scriptRegex = new Regex("<script.*?</script>", RegexOptions.Singleline | RegexOptions.IgnoreCase); var scriptRegexMatches = scriptRegex.Matches(linkedHtml); for (var i = 0; i < scriptRegexMatches.Count; i++) { linkedHtml = linkedHtml.Replace(scriptRegexMatches[i].Value, $"<pre>{HttpContext.Current.Server.HtmlEncode(scriptRegexMatches[i].Value)}</pre>"); } html = linkedHtml; // Linkify images if they are shown as resized versions (only relevant for new Markdown comments) var doc = new HtmlDocument(); doc.LoadHtml(html); var root = doc.DocumentNode; if (root != null) { var images = root.SelectNodes("//img"); if (images != null) { foreach (var image in images) { var src = image.GetAttributeValue("src", ""); var orgSrc = src.Replace("rs/", ""); if (src == orgSrc || image.ParentNode.Name == "a") { continue; } var a = doc.CreateElement("a"); a.SetAttributeValue("href", orgSrc); a.SetAttributeValue("target", "_blank"); a.AppendChild(image.Clone()); image.ParentNode.ReplaceChild(a, image); } } // Any links not going to an "approved" domain need to be marked as nofollow var links = root.SelectNodes("//a"); if (links != null) { foreach (var link in links) { if (link.Attributes["href"] != null && (SpamChecker.CountValidLinks(link.Attributes["href"].Value, 0) == 0)) { if (link.Attributes["rel"] != null) { link.Attributes.Remove("rel"); } link.Attributes.Add("rel", "nofollow noreferrer noopener"); } } } // Remove styles from all elements var elementsWithStyleAttribute = root.SelectNodes("//@style"); if (elementsWithStyleAttribute != null) { foreach (var element in elementsWithStyleAttribute) { element.Attributes.Remove("style"); } } using (var writer = new StringWriter()) { doc.Save(writer); html = writer.ToString(); } } var sanitizer = new HtmlSanitizer(); var sanitized = sanitizer.Sanitize(html); return(new HtmlString(sanitized)); }
public static HtmlString Sanitize(this string html) { // Run it through Markdown first var md = new Markdown(); html = md.Transform(html); // Linkify images if they are shown as resized versions (only relevant for new Markdown comments) var doc = new HtmlDocument(); doc.LoadHtml(html); var root = doc.DocumentNode; if (root != null) { var images = root.SelectNodes("//img"); if (images != null) { foreach (var image in images) { var src = image.GetAttributeValue("src", ""); var orgSrc = src.Replace("rs/", ""); if (src == orgSrc || image.ParentNode.Name == "a") { continue; } var a = doc.CreateElement("a"); a.SetAttributeValue("href", orgSrc); a.SetAttributeValue("target", "_blank"); a.AppendChild(image.Clone()); image.ParentNode.ReplaceChild(a, image); } } // Any links not going to an "approved" domain need to be marked as nofollow var links = root.SelectNodes("//a"); if (links != null) { foreach (var link in links) { if (link.Attributes["href"] != null && (SpamChecker.CountValidLinks(link.Attributes["href"].Value, 0) == 0)) { if (link.Attributes["rel"] != null) { link.Attributes.Remove("rel"); } link.Attributes.Add("rel", "nofollow"); } } } // Remove styles from all elements var elementsWithStyleAttribute = root.SelectNodes("//@style"); if (elementsWithStyleAttribute != null) { foreach (var element in elementsWithStyleAttribute) { element.Attributes.Remove("style"); } } using (var writer = new StringWriter()) { doc.Save(writer); html = writer.ToString(); } } return(new HtmlString(Utils.Sanitize(html))); }