Esempio n. 1
0
        public static HtmlString Sanitize(this string html)
        {
            // Run it through Markdown first
            var md = new Markdown();

            html = md.Transform(html);

            // Add links to URLs that aren't "properly" linked in a markdown way
            var regex = new Regex(@"(^|\s|>|;)(https?|ftp)(:\/\/[-A-Z0-9+&@#\/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#\/%=~_|\[\]])($|\W)", RegexOptions.IgnoreCase | RegexOptions.Compiled);

            var linkedHtml = regex.Replace(html, "$1<a href=\"$2$3\">$2$3</a>$4").Replace("href=\"www", "href=\"http://www");

            var scriptRegex        = new Regex("<script.*?</script>", RegexOptions.Singleline | RegexOptions.IgnoreCase);
            var scriptRegexMatches = scriptRegex.Matches(linkedHtml);

            for (var i = 0; i < scriptRegexMatches.Count; i++)
            {
                linkedHtml = linkedHtml.Replace(scriptRegexMatches[i].Value, $"<pre>{HttpContext.Current.Server.HtmlEncode(scriptRegexMatches[i].Value)}</pre>");
            }

            html = linkedHtml;

            // Linkify images if they are shown as resized versions (only relevant for new Markdown comments)
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            var root = doc.DocumentNode;

            if (root != null)
            {
                var images = root.SelectNodes("//img");
                if (images != null)
                {
                    foreach (var image in images)
                    {
                        var src    = image.GetAttributeValue("src", "");
                        var orgSrc = src.Replace("rs/", "");

                        if (src == orgSrc || image.ParentNode.Name == "a")
                        {
                            continue;
                        }

                        var a = doc.CreateElement("a");
                        a.SetAttributeValue("href", orgSrc);
                        a.SetAttributeValue("target", "_blank");

                        a.AppendChild(image.Clone());

                        image.ParentNode.ReplaceChild(a, image);
                    }
                }

                // Any links not going to an "approved" domain need to be marked as nofollow
                var links = root.SelectNodes("//a");
                if (links != null)
                {
                    foreach (var link in links)
                    {
                        if (link.Attributes["href"] != null && (SpamChecker.CountValidLinks(link.Attributes["href"].Value, 0) == 0))
                        {
                            if (link.Attributes["rel"] != null)
                            {
                                link.Attributes.Remove("rel");
                            }
                            link.Attributes.Add("rel", "nofollow noreferrer noopener");
                        }
                    }
                }

                // Remove styles from all elements
                var elementsWithStyleAttribute = root.SelectNodes("//@style");
                if (elementsWithStyleAttribute != null)
                {
                    foreach (var element in elementsWithStyleAttribute)
                    {
                        element.Attributes.Remove("style");
                    }
                }

                using (var writer = new StringWriter())
                {
                    doc.Save(writer);
                    html = writer.ToString();
                }
            }

            var sanitizer = new HtmlSanitizer();
            var sanitized = sanitizer.Sanitize(html);

            return(new HtmlString(sanitized));
        }
Esempio n. 2
0
        public static HtmlString Sanitize(this string html)
        {
            // Run it through Markdown first
            var md = new Markdown();

            html = md.Transform(html);

            // Linkify images if they are shown as resized versions (only relevant for new Markdown comments)
            var doc = new HtmlDocument();

            doc.LoadHtml(html);

            var root = doc.DocumentNode;

            if (root != null)
            {
                var images = root.SelectNodes("//img");
                if (images != null)
                {
                    foreach (var image in images)
                    {
                        var src    = image.GetAttributeValue("src", "");
                        var orgSrc = src.Replace("rs/", "");

                        if (src == orgSrc || image.ParentNode.Name == "a")
                        {
                            continue;
                        }

                        var a = doc.CreateElement("a");
                        a.SetAttributeValue("href", orgSrc);
                        a.SetAttributeValue("target", "_blank");

                        a.AppendChild(image.Clone());

                        image.ParentNode.ReplaceChild(a, image);
                    }
                }

                // Any links not going to an "approved" domain need to be marked as nofollow
                var links = root.SelectNodes("//a");
                if (links != null)
                {
                    foreach (var link in links)
                    {
                        if (link.Attributes["href"] != null && (SpamChecker.CountValidLinks(link.Attributes["href"].Value, 0) == 0))
                        {
                            if (link.Attributes["rel"] != null)
                            {
                                link.Attributes.Remove("rel");
                            }
                            link.Attributes.Add("rel", "nofollow");
                        }
                    }
                }

                // Remove styles from all elements
                var elementsWithStyleAttribute = root.SelectNodes("//@style");
                if (elementsWithStyleAttribute != null)
                {
                    foreach (var element in elementsWithStyleAttribute)
                    {
                        element.Attributes.Remove("style");
                    }
                }

                using (var writer = new StringWriter())
                {
                    doc.Save(writer);
                    html = writer.ToString();
                }
            }

            return(new HtmlString(Utils.Sanitize(html)));
        }