private void StartBlock(string prefix, HtmlStartTag start, bool multiline = false)
        {
            if (_minify == MinifyState.Compressed ||
                _minify == MinifyState.SpaceNeeded ||
                _minify == MinifyState.BlockEnd)
            {
                _writer.Write(_settings.NewLineChars);
                _writer.Write(_settings.NewLineChars);
                _minify = MinifyState.LastCharWasSpace;
            }

            if (prefix == "" && (_lastWasMultiline || multiline || start.Any(k => k.Key == "class" || k.Key == "id" || k.Key == "style" || k.Key == "lang")))
            {
                prefix = "p";
            }
            _writer.Write(prefix);
            WriteAttributes(start, _writer);

            if (prefix != "")
            {
                _writer.Write('.');
                _minify = MinifyState.SpaceNeeded;
            }
            if (multiline)
            {
                _writer.Write('.');
            }
            _outputStarted = true;
        }
Example #2
0
        private bool SkipElementDefault(HtmlStartTag start)
        {
            if (_elementsToSkip.Contains(start.Value))
            {
                return(true);
            }

            var style = start["style"];

            if (!string.IsNullOrEmpty(style))
            {
                foreach (var token in new CssTokenizer(style).Normalize().OfType <CssPropertyToken>())
                {
                    switch (token.Data)
                    {
                    case "display":
                        if (token.ArgumentCount == 1 && token.ArgumentTokens.Single().Data == "none")
                        {
                            return(true);
                        }
                        break;

                    case "visibility":
                        if (token.ArgumentCount == 1 && token.ArgumentTokens.Single().Data == "hidden")
                        {
                            return(true);
                        }
                        break;
                    }
                }
            }

            return(false);
        }
Example #3
0
 internal bool ShouldSkipElement(HtmlStartTag start)
 {
     if (SkipElement == null)
     {
         return(SkipElementDefault(start));
     }
     return(SkipElement(start, SkipElementDefault));
 }
        public override void WriteStartElement(string prefix, string localName, string ns)
        {
            WriteStartElementEnd(localName);
            var start = new HtmlStartTag(localName.ToLowerInvariant());

            _nodes.Push(start);
            _state            = InternalState.Element;
            _lastWasMultiline = false;
        }
Example #5
0
        /// <summary>
        /// Setups a new math element with the attributes from the token.
        /// </summary>
        /// <param name="element">The element to setup.</param>
        /// <param name="tag">The tag token to use.</param>
        /// <returns>The finished element.</returns>
        public static HtmlTagNode MathMlConfig(HtmlStartTag tag)
        {
            var count = tag.Attributes.Count;

            for (var i = 0; i < count; i++)
            {
                var kvp = tag.Attributes[i];
                if (kvp.Key.Is("definitionurl"))
                {
                    tag.Attributes[i] = new KeyValuePair <string, string>("definitionURL", kvp.Value);
                }
            }

            return(tag);
        }
Example #6
0
        /// <summary>
        /// Setups a new SVG element with the attributes from the token.
        /// </summary>
        /// <param name="element">The element to setup.</param>
        /// <param name="tag">The tag token to use.</param>
        /// <returns>The finished element.</returns>
        public static HtmlTagNode SvgConfig(this HtmlStartTag tag)
        {
            var count        = tag.Attributes.Count;
            var adjustedName = default(String);

            if (svgAdjustedTagNames.TryGetValue(tag.Value, out adjustedName))
            {
                tag.Value = adjustedName;
            }

            for (var i = 0; i < count; i++)
            {
                var kvp = tag.Attributes[i];
                if (svgAttributeNames.TryGetValue(kvp.Key, out adjustedName))
                {
                    tag.Attributes[i] = new KeyValuePair <string, string>(adjustedName, kvp.Value);
                }
            }

            return(tag);
        }
        private void WriteAttributes(HtmlStartTag start, TextWriter writer)
        {
            string buffer;
            var    inParen = false;

            if (start.TryGetValue("class", out buffer))
            {
                writer.Write('(');
                writer.Write(buffer);
                inParen = true;
            }
            if (start.TryGetValue("id", out buffer))
            {
                if (!inParen)
                {
                    writer.Write('(');
                }
                writer.Write('#');
                writer.Write(buffer);
                inParen = true;
            }
            if (inParen)
            {
                writer.Write(')');
            }
            else if (start.TryGetValue("style", out buffer))
            {
                writer.Write('{');
                writer.Write(buffer.Trim().TrimEnd(';'));
                writer.Write('}');
            }
            else if (start.TryGetValue("lang", out buffer))
            {
                writer.Write('[');
                writer.Write(buffer);
                writer.Write(']');
            }
        }
        /// <summary>
        /// Writes the specified start tag and associates it with the given namespace and prefix.
        /// </summary>
        /// <param name="prefix">The namespace prefix of the element.</param>
        /// <param name="localName">The local name of the element.</param>
        /// <param name="ns">The namespace URI to associate with the element.</param>
        public override void WriteStartElement(string prefix, string localName, string ns)
        {
            WriteStartElementEnd();
            var start = new HtmlStartTag(localName.ToLowerInvariant());

            _nodes.Push(start);
            switch (start.Value)
            {
            case "b":
            case "strong":
                StartInline();
                if (_boldDepth > _nodes.Count)
                {
                    _boldDepth = _nodes.Count;
                    _writer.Write("**");
                }
                break;

            case "blockquote":
                StartBlock("> ");
                break;

            case "br":
                _minify = MinifyState.LastCharWasSpace;
                switch (_preserveWhitespace)
                {
                case PreserveState.BeforeContent:
                    _preserveWhitespace = PreserveState.Preserve;
                    break;

                case PreserveState.InternalLineFeed:
                    _writer.Write(_settings.NewLineChars);
                    WritePrefix();
                    _preserveWhitespace = PreserveState.InternalLineFeed;
                    break;

                case PreserveState.None:
                    _writer.Write('\\');
                    _writer.Write(_settings.NewLineChars);
                    WritePrefix();
                    break;

                default:
                    _preserveWhitespace = PreserveState.InternalLineFeed;
                    break;
                }
                break;

            case "code":
                if (_preserveWhitespace == PreserveState.None)
                {
                    StartInline();
                    _writer.Write('`');
                }
                break;

            case "em":
            case "i":
                StartInline();
                if (_italicDepth > _nodes.Count)
                {
                    _italicDepth = _nodes.Count;
                    _writer.Write("*");
                }
                break;

            case "h1":
                StartBlock("# ");
                break;

            case "h2":
                StartBlock("## ");
                break;

            case "h3":
                StartBlock("### ");
                break;

            case "h4":
                StartBlock("#### ");
                break;

            case "h5":
                StartBlock("##### ");
                break;

            case "h6":
                StartBlock("###### ");
                break;

            case "hr":
                StartBlock("* * *");
                EndBlock();
                _writer.Write(_settings.NewLineChars);
                _minify = MinifyState.LastCharWasSpace;
                break;

            case "li":
                if (_outputStarted)
                {
                    _writer.Write(_settings.NewLineChars);
                }
                if (_linePrefix.Count > 0 &&
                    char.IsDigit(_linePrefix[_linePrefix.Count - 1][0]))
                {
                    var value = _linePrefix[_linePrefix.Count - 1];
                    value = string.Format("{0}. ", int.Parse(value.Substring(0, value.Length - 2)) + 1);
                    _linePrefix[_linePrefix.Count - 1] = value;
                }
                WritePrefix();
                _minify = MinifyState.LastCharWasSpace;
                break;

            case "p":
                StartBlock("");
                break;

            case "pre":
                StartBlock("    ");
                _preserveWhitespace = PreserveState.BeforeContent;
                break;

            case "ol":
                StartList("0. ");
                break;

            case "ul":
                StartList("- ");
                break;
            }
            _state = InternalState.Element;
        }
Example #9
0
        /// <summary>
        /// Compress HTML by removing unnecessary whitespace and comments
        /// </summary>
        /// <param name="reader">Stream of <see cref="HtmlNode"/> to minify</param>
        /// <param name="settings">Settings to control how the HTML is compressed</param>
        /// <returns>Stream of minified <see cref="HtmlNode"/></returns>
        public static IEnumerable <HtmlNode> Minify(this IEnumerable <HtmlNode> reader, HtmlMinifySettings settings = null)
        {
            settings = settings ?? HtmlMinifySettings.ReadOnlyDefault;
            var           state    = MinifyState.LastCharWasSpace;
            var           tagState = ContainingTag.None;
            int           trimStart;
            int           trimEnd;
            StringBuilder builder = null;
            var           jsMin   = new JSMin();

            foreach (var node in reader)
            {
                if (node.Type == HtmlTokenType.Comment)
                {
                    // Ignore comments, unless they are conditional
                    if (node.Value.StartsWith("[if") || node.Value.EndsWith("endif]"))
                    {
                        yield return(node);
                    }
                }
                else if (node.Type == HtmlTokenType.Text)
                {
                    if (node.Value == "" || node.Value == null)
                    {
                        // do nothing
                    }
                    else if (tagState == ContainingTag.WhitespacePreserve)
                    {
                        yield return(node);

                        state = MinifyState.LastCharWasSpace;
                    }
                    else if (tagState == ContainingTag.Script)
                    {
                        if (builder == null)
                        {
                            builder = Pool.NewStringBuilder();
                        }
                        builder.Append(node.Value);
                    }
                    else
                    {
                        TrimIndices(node.Value, out trimStart, out trimEnd);
                        if (trimEnd < 0)
                        {
                            // Do nothing for an empty or null string
                        }
                        else if (trimEnd < trimStart)
                        {
                            if (state == MinifyState.Compressed)
                            {
                                state = MinifyState.SpaceNeeded;
                            }
                        }
                        else
                        {
                            if (state == MinifyState.SpaceNeeded && trimStart == 0)
                            {
                                yield return(new HtmlText(node.Position, " "));

                                state = MinifyState.LastCharWasSpace;
                            }

                            if (state == MinifyState.LastCharWasSpace ||
                                state == MinifyState.InlineStartAfterSpace ||
                                trimStart == 0)
                            {
                                yield return(new HtmlText(node.Position, GetCompressedString(node.Value, trimStart, trimEnd)));
                            }
                            else
                            {
                                yield return(new HtmlText(node.Position, GetCompressedString(node.Value, trimStart - 1, trimEnd)));
                            }

                            if (trimEnd < node.Value.Length - 1)
                            {
                                state = MinifyState.SpaceNeeded;
                            }
                            else
                            {
                                state = MinifyState.Compressed;
                            }
                        }
                    }
                }
                else
                {
                    if (state == MinifyState.SpaceNeeded)
                    {
                        if (node.Type == HtmlTokenType.EndTag && !settings.InlineElements.Contains(node.Value))
                        {
                            state = MinifyState.LastCharWasSpace;
                        }
                        else
                        {
                            // Inline elements can render spaces, otherwise spaces shouldn't be rendered between elements
                            if ((node.Type == HtmlTokenType.StartTag || node.Type == HtmlTokenType.EndTag) &&
                                settings.InlineElements.Contains(node.Value))
                            {
                                yield return(new HtmlText(node.Position, " "));
                            }
                            if (settings.PreserveSurroundingSpaceTags.Contains(node.Value))
                            {
                                state = MinifyState.Compressed;
                            }
                            else
                            {
                                state = MinifyState.LastCharWasSpace;
                            }
                        }
                    }

                    if (node.Type == HtmlTokenType.EndTag && node.Value == "script" && builder != null)
                    {
                        yield return(new HtmlText(node.Position, Js.Minify(new TextSource(builder))));

                        builder.ToPool();
                        builder = null;
                    }

                    var tag = node as HtmlStartTag;
                    if (tag != null)
                    {
                        if (tag.Attributes.Any(a => a.Key == "style" || a.Key == "class"))
                        {
                            var newTag = new HtmlStartTag(tag.Position, tag.Value);
                            foreach (var attr in tag.Attributes)
                            {
                                if (attr.Key == "style")
                                {
                                    newTag.Add(attr.Key, TrimStyleString(attr.Value));
                                }
                                else if (attr.Key == "class")
                                {
                                    newTag.Add(attr.Key, GetCompressedString(attr.Value));
                                }
                                else
                                {
                                    newTag.Attributes.Add(attr);
                                }
                            }
                            newTag.IsSelfClosing = tag.IsSelfClosing;
                            yield return(newTag);
                        }
                        else
                        {
                            yield return(node);
                        }

                        if (state == MinifyState.LastCharWasSpace &&
                            settings.InlineElements.Contains(node.Value))
                        {
                            if (HtmlTextWriter.VoidElements.Contains(node.Value))
                            {
                                state = MinifyState.Compressed;
                            }
                            else
                            {
                                state = MinifyState.InlineStartAfterSpace;
                            }
                        }
                    }
                    else
                    {
                        yield return(node);

                        if (state == MinifyState.InlineStartAfterSpace)
                        {
                            state = MinifyState.Compressed;
                        }
                    }


                    if (node.Type == HtmlTokenType.StartTag && settings.PreserveInnerSpaceTags.Contains(node.Value))
                    {
                        tagState = ContainingTag.WhitespacePreserve;
                    }
                    else if (tag?.Value == "script")
                    {
                        var type = tag["type"];
                        if (string.IsNullOrEmpty(type))
                        {
                            type = "application/javascript";
                        }
                        if (settings.ScriptTypesToCompress.Contains(type))
                        {
                            tagState = ContainingTag.Script;
                        }
                        else
                        {
                            tagState = ContainingTag.WhitespacePreserve;
                        }
                    }
                    else if (node.Type == HtmlTokenType.EndTag &&
                             (settings.PreserveInnerSpaceTags.Contains(node.Value) || node.Value == "script"))
                    {
                        tagState = ContainingTag.None;
                    }
                }
            }
        }
Example #10
0
        /// <summary>
        /// Sanitizes the specified HTML, removing scripts, styles, and tags
        /// which might pose a security concern
        /// </summary>
        /// <param name="reader">A stream of <see cref="HtmlNode"/></param>
        /// <param name="settings">Settings controlling what CSS and HTML is permitted in the result</param>
        /// <returns>A stream of sanitized <see cref="HtmlNode"/></returns>
        /// <remarks>
        /// The goal of sanitization is to prevent XSS patterns
        /// described on <a href="https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet">XSS Filter Evasion Cheat Sheet</a>
        /// </remarks>
        public static IEnumerable <HtmlNode> Sanitize(this IEnumerable <HtmlNode> reader, HtmlSanitizeSettings settings)
        {
            var removeDepth = -1;
            var inStyle     = false;

            settings = settings ?? HtmlSanitizeSettings.ReadOnlyDefault;

            foreach (var origToken in reader)
            {
                var token = origToken;
                if (token.Type == HtmlTokenType.StartTag && _emailRegex.IsMatch(token.Value))
                {
                    token = new HtmlText(token.Position, "<" + token.Value + ">");
                }

                switch (token.Type)
                {
                case HtmlTokenType.Text:
                    if (removeDepth < 0)
                    {
                        if (inStyle)
                        {
                            yield return(new HtmlText(token.Position, SanitizeCss(token.Value, settings, true)));
                        }
                        else
                        {
                            yield return(token);
                        }
                    }
                    break;

                case HtmlTokenType.Comment:
                    // No need to risk weird comments that might be interpreted as content (e.g. in IE)
                    break;

                case HtmlTokenType.Doctype:
                    // Doctypes should not appear in snippets
                    break;

                case HtmlTokenType.StartTag:
                    var tag = (HtmlStartTag)token;
                    if (removeDepth < 0)
                    {
                        if (settings.AllowedTags.Contains(token.Value))
                        {
                            if (token.Value == "style")
                            {
                                inStyle = true;
                            }

                            var allowed = AllowedAttributes(tag, settings).ToArray();

                            if (tag.Value == "img" && !allowed.Any(k => k.Key == "src"))
                            {
                                if (!HtmlTextWriter.VoidElements.Contains(tag.Value) && !tag.IsSelfClosing)
                                {
                                    removeDepth = 0;
                                }
                            }
                            else
                            {
                                var newTag = new HtmlStartTag(tag.Position, tag.Value);
                                newTag.IsSelfClosing = tag.IsSelfClosing;
                                foreach (var attr in allowed)
                                {
                                    newTag.Attributes.Add(attr);
                                }
                                yield return(newTag);
                            }
                        }
                        else if (!HtmlTextWriter.VoidElements.Contains(tag.Value) && !tag.IsSelfClosing)
                        {
                            removeDepth = 0;
                        }
                    }
                    else
                    {
                        if (!HtmlTextWriter.VoidElements.Contains(tag.Value) && !tag.IsSelfClosing)
                        {
                            removeDepth++;
                        }
                    }
                    break;

                case HtmlTokenType.EndTag:
                    if (removeDepth < 0 && settings.AllowedTags.Contains(token.Value))
                    {
                        yield return(token);
                    }
                    else
                    {
                        removeDepth--;
                    }

                    if (token.Value == "style")
                    {
                        inStyle = false;
                    }
                    break;
                }
            }
        }
Example #11
0
 private static IEnumerable <KeyValuePair <string, string> > AllowedAttributes(HtmlStartTag tag, HtmlSanitizeSettings settings)
 {
     for (var i = 0; i < tag.Attributes.Count; i++)
     {
         if (!settings.AllowedAttributes.Contains(tag.Attributes[i].Key))
         {
             // Do nothing
         }
         else if (string.Equals(tag.Attributes[i].Key, "style", StringComparison.OrdinalIgnoreCase))
         {
             var style = SanitizeCss(tag.Attributes[i].Value, settings, false);
             if (!style.IsNullOrWhiteSpace())
             {
                 yield return(new KeyValuePair <string, string>(tag.Attributes[i].Key, style));
             }
         }
         else if (settings.UriAttributes.Contains(tag.Attributes[i].Key))
         {
             var url = SanitizeUrl(tag.Attributes[i].Value, settings);
             if (url != null)
             {
                 yield return(new KeyValuePair <string, string>(tag.Attributes[i].Key, url));
             }
         }
         else if (!tag.Attributes[i].Value.StartsWith("&{"))
         {
             yield return(tag.Attributes[i]);
         }
     }
 }