Exemple #1
0
        public HtmlSanitizer(
            IDefaultHtmlSanitizationRuleSetFactory defaultHtmlSanitizationRuleSetFactory
            )
        {
            var defaultHtmlSanitizationRuleSet = defaultHtmlSanitizationRuleSetFactory.Create();

            _defaultSanitizer = CreateSanitizer(defaultHtmlSanitizationRuleSet);
            _defaultBaseUrl   = GetBaseUrl(defaultHtmlSanitizationRuleSet);
        }
Exemple #2
0
        /// <summary>
        /// Takes raw HTML input and cleans against a whitelist
        /// </summary>
        /// <param name="source">Html source</param>
        /// <param name="ruleSet">A custom set of tags to allow. first generic parameter is the tag, second is the allowed attributes.</param>
        /// <returns>Clean output an an HtmlNode object</returns>
        public HtmlNode SanitizeAsHtmlNode(string source, HtmlSanitizationRuleSet ruleSet = null)
        {
            if (source == null)
            {
                return(null);
            }
            if (source.Trim() == string.Empty)
            {
                return(null);
            }

            source = FixUnEncodedLt(source);
            HtmlDocument html = GetHtml(source);

            if (html == null)
            {
                return(null);
            }

            // All the nodes
            HtmlNode allNodes = html.DocumentNode;

            // Select whitelist tag names
            ruleSet = ruleSet ?? _defaultHtmlSanitizationRuleSetFactory.Create();

            // Scrub tags not in whitelist
            CleanNodes(allNodes, ruleSet.PermittedTags);

            // Filter the attributes of the remaining
            foreach (var tag in ruleSet.PermittedTags)
            {
                IEnumerable <HtmlNode> nodes = (from n in allNodes.DescendantsAndSelf()
                                                where n.Name == tag.Tag
                                                select n);
                // No nodes? Skip.
                if (nodes == null)
                {
                    continue;
                }

                foreach (var n in nodes)
                {
                    // No attributes? Skip.
                    if (!n.HasAttributes)
                    {
                        continue;
                    }

                    // Get all the allowed attributes for this tag
                    HtmlAttribute[] attr = n.Attributes.ToArray();
                    foreach (HtmlAttribute a in attr)
                    {
                        if (!tag.PermittedAttributes.Contains(a.Name))
                        {
                            a.Remove(); // Attribute wasn't in the whitelist
                        }
                        else
                        {
                            // *** New workaround. This wasn't necessary with the old library
                            if (a.Name == "href" || a.Name == "src")
                            {
                                a.Value = (!string.IsNullOrEmpty(a.Value)) ? a.Value.Replace("\r", "").Replace("\n", "") : "";
                                a.Value =
                                    (!string.IsNullOrEmpty(a.Value) &&
                                     (a.Value.IndexOf("javascript") < 10 || a.Value.IndexOf("eval") < 10)) ?
                                    a.Value.Replace("javascript", "").Replace("eval", "") : a.Value;
                            }
                            else if (a.Name == "class" || a.Name == "style")
                            {
                                a.Value = AntiXssEncoder.CssEncode(a.Value);
                            }
                            else
                            {
                                a.Value = HttpUtility.HtmlAttributeEncode(HttpUtility.HtmlDecode(a.Value)); // amended to prevent double encoding
                            }
                        }
                    }
                }
            }

            if (ruleSet.OnHtmlSanitized != null)
            {
                ruleSet.OnHtmlSanitized(html);
            }

            // *** New workaround (DO NOTHING HAHAHA! Fingers crossed)
            return(allNodes);

            // *** Original code below

            /*
             * // Anything we missed will get stripped out
             * return
             *  Microsoft.Security.Application.Sanitizer.GetSafeHtmlFragment(allNodes.InnerHtml);
             */
        }