private void StartBlock(string prefix, HtmlStartTag start, bool multiline = false) { if (_minify == MinifyState.Compressed || _minify == MinifyState.SpaceNeeded || _minify == MinifyState.BlockEnd) { _writer.Write(_settings.NewLineChars); _writer.Write(_settings.NewLineChars); _minify = MinifyState.LastCharWasSpace; } if (prefix == "" && (_lastWasMultiline || multiline || start.Any(k => k.Key == "class" || k.Key == "id" || k.Key == "style" || k.Key == "lang"))) { prefix = "p"; } _writer.Write(prefix); WriteAttributes(start, _writer); if (prefix != "") { _writer.Write('.'); _minify = MinifyState.SpaceNeeded; } if (multiline) { _writer.Write('.'); } _outputStarted = true; }
private bool SkipElementDefault(HtmlStartTag start) { if (_elementsToSkip.Contains(start.Value)) { return(true); } var style = start["style"]; if (!string.IsNullOrEmpty(style)) { foreach (var token in new CssTokenizer(style).Normalize().OfType <CssPropertyToken>()) { switch (token.Data) { case "display": if (token.ArgumentCount == 1 && token.ArgumentTokens.Single().Data == "none") { return(true); } break; case "visibility": if (token.ArgumentCount == 1 && token.ArgumentTokens.Single().Data == "hidden") { return(true); } break; } } } return(false); }
internal bool ShouldSkipElement(HtmlStartTag start) { if (SkipElement == null) { return(SkipElementDefault(start)); } return(SkipElement(start, SkipElementDefault)); }
public override void WriteStartElement(string prefix, string localName, string ns) { WriteStartElementEnd(localName); var start = new HtmlStartTag(localName.ToLowerInvariant()); _nodes.Push(start); _state = InternalState.Element; _lastWasMultiline = false; }
/// <summary> /// Setups a new math element with the attributes from the token. /// </summary> /// <param name="element">The element to setup.</param> /// <param name="tag">The tag token to use.</param> /// <returns>The finished element.</returns> public static HtmlTagNode MathMlConfig(HtmlStartTag tag) { var count = tag.Attributes.Count; for (var i = 0; i < count; i++) { var kvp = tag.Attributes[i]; if (kvp.Key.Is("definitionurl")) { tag.Attributes[i] = new KeyValuePair <string, string>("definitionURL", kvp.Value); } } return(tag); }
/// <summary> /// Setups a new SVG element with the attributes from the token. /// </summary> /// <param name="element">The element to setup.</param> /// <param name="tag">The tag token to use.</param> /// <returns>The finished element.</returns> public static HtmlTagNode SvgConfig(this HtmlStartTag tag) { var count = tag.Attributes.Count; var adjustedName = default(String); if (svgAdjustedTagNames.TryGetValue(tag.Value, out adjustedName)) { tag.Value = adjustedName; } for (var i = 0; i < count; i++) { var kvp = tag.Attributes[i]; if (svgAttributeNames.TryGetValue(kvp.Key, out adjustedName)) { tag.Attributes[i] = new KeyValuePair <string, string>(adjustedName, kvp.Value); } } return(tag); }
private void WriteAttributes(HtmlStartTag start, TextWriter writer) { string buffer; var inParen = false; if (start.TryGetValue("class", out buffer)) { writer.Write('('); writer.Write(buffer); inParen = true; } if (start.TryGetValue("id", out buffer)) { if (!inParen) { writer.Write('('); } writer.Write('#'); writer.Write(buffer); inParen = true; } if (inParen) { writer.Write(')'); } else if (start.TryGetValue("style", out buffer)) { writer.Write('{'); writer.Write(buffer.Trim().TrimEnd(';')); writer.Write('}'); } else if (start.TryGetValue("lang", out buffer)) { writer.Write('['); writer.Write(buffer); writer.Write(']'); } }
/// <summary> /// Writes the specified start tag and associates it with the given namespace and prefix. /// </summary> /// <param name="prefix">The namespace prefix of the element.</param> /// <param name="localName">The local name of the element.</param> /// <param name="ns">The namespace URI to associate with the element.</param> public override void WriteStartElement(string prefix, string localName, string ns) { WriteStartElementEnd(); var start = new HtmlStartTag(localName.ToLowerInvariant()); _nodes.Push(start); switch (start.Value) { case "b": case "strong": StartInline(); if (_boldDepth > _nodes.Count) { _boldDepth = _nodes.Count; _writer.Write("**"); } break; case "blockquote": StartBlock("> "); break; case "br": _minify = MinifyState.LastCharWasSpace; switch (_preserveWhitespace) { case PreserveState.BeforeContent: _preserveWhitespace = PreserveState.Preserve; break; case PreserveState.InternalLineFeed: _writer.Write(_settings.NewLineChars); WritePrefix(); _preserveWhitespace = PreserveState.InternalLineFeed; break; case PreserveState.None: _writer.Write('\\'); _writer.Write(_settings.NewLineChars); WritePrefix(); break; default: _preserveWhitespace = PreserveState.InternalLineFeed; break; } break; case "code": if (_preserveWhitespace == PreserveState.None) { StartInline(); _writer.Write('`'); } break; case "em": case "i": StartInline(); if (_italicDepth > _nodes.Count) { _italicDepth = _nodes.Count; _writer.Write("*"); } break; case "h1": StartBlock("# "); break; case "h2": StartBlock("## "); break; case "h3": StartBlock("### "); break; case "h4": StartBlock("#### "); break; case "h5": StartBlock("##### "); break; case "h6": StartBlock("###### "); break; case "hr": StartBlock("* * *"); EndBlock(); _writer.Write(_settings.NewLineChars); _minify = MinifyState.LastCharWasSpace; break; case "li": if (_outputStarted) { _writer.Write(_settings.NewLineChars); } if (_linePrefix.Count > 0 && char.IsDigit(_linePrefix[_linePrefix.Count - 1][0])) { var value = _linePrefix[_linePrefix.Count - 1]; value = string.Format("{0}. ", int.Parse(value.Substring(0, value.Length - 2)) + 1); _linePrefix[_linePrefix.Count - 1] = value; } WritePrefix(); _minify = MinifyState.LastCharWasSpace; break; case "p": StartBlock(""); break; case "pre": StartBlock(" "); _preserveWhitespace = PreserveState.BeforeContent; break; case "ol": StartList("0. "); break; case "ul": StartList("- "); break; } _state = InternalState.Element; }
/// <summary> /// Compress HTML by removing unnecessary whitespace and comments /// </summary> /// <param name="reader">Stream of <see cref="HtmlNode"/> to minify</param> /// <param name="settings">Settings to control how the HTML is compressed</param> /// <returns>Stream of minified <see cref="HtmlNode"/></returns> public static IEnumerable <HtmlNode> Minify(this IEnumerable <HtmlNode> reader, HtmlMinifySettings settings = null) { settings = settings ?? HtmlMinifySettings.ReadOnlyDefault; var state = MinifyState.LastCharWasSpace; var tagState = ContainingTag.None; int trimStart; int trimEnd; StringBuilder builder = null; var jsMin = new JSMin(); foreach (var node in reader) { if (node.Type == HtmlTokenType.Comment) { // Ignore comments, unless they are conditional if (node.Value.StartsWith("[if") || node.Value.EndsWith("endif]")) { yield return(node); } } else if (node.Type == HtmlTokenType.Text) { if (node.Value == "" || node.Value == null) { // do nothing } else if (tagState == ContainingTag.WhitespacePreserve) { yield return(node); state = MinifyState.LastCharWasSpace; } else if (tagState == ContainingTag.Script) { if (builder == null) { builder = Pool.NewStringBuilder(); } builder.Append(node.Value); } else { TrimIndices(node.Value, out trimStart, out trimEnd); if (trimEnd < 0) { // Do nothing for an empty or null string } else if (trimEnd < trimStart) { if (state == MinifyState.Compressed) { state = MinifyState.SpaceNeeded; } } else { if (state == MinifyState.SpaceNeeded && trimStart == 0) { yield return(new HtmlText(node.Position, " ")); state = MinifyState.LastCharWasSpace; } if (state == MinifyState.LastCharWasSpace || state == MinifyState.InlineStartAfterSpace || trimStart == 0) { yield return(new HtmlText(node.Position, GetCompressedString(node.Value, trimStart, trimEnd))); } else { yield return(new HtmlText(node.Position, GetCompressedString(node.Value, trimStart - 1, trimEnd))); } if (trimEnd < node.Value.Length - 1) { state = MinifyState.SpaceNeeded; } else { state = MinifyState.Compressed; } } } } else { if (state == MinifyState.SpaceNeeded) { if (node.Type == HtmlTokenType.EndTag && !settings.InlineElements.Contains(node.Value)) { state = MinifyState.LastCharWasSpace; } else { // Inline elements can render spaces, otherwise spaces shouldn't be rendered between elements if ((node.Type == HtmlTokenType.StartTag || node.Type == HtmlTokenType.EndTag) && settings.InlineElements.Contains(node.Value)) { yield return(new HtmlText(node.Position, " ")); } if (settings.PreserveSurroundingSpaceTags.Contains(node.Value)) { state = MinifyState.Compressed; } else { state = MinifyState.LastCharWasSpace; } } } if (node.Type == HtmlTokenType.EndTag && node.Value == "script" && builder != null) { yield return(new HtmlText(node.Position, Js.Minify(new TextSource(builder)))); builder.ToPool(); builder = null; } var tag = node as HtmlStartTag; if (tag != null) { if (tag.Attributes.Any(a => a.Key == "style" || a.Key == "class")) { var newTag = new HtmlStartTag(tag.Position, tag.Value); foreach (var attr in tag.Attributes) { if (attr.Key == "style") { newTag.Add(attr.Key, TrimStyleString(attr.Value)); } else if (attr.Key == "class") { newTag.Add(attr.Key, GetCompressedString(attr.Value)); } else { newTag.Attributes.Add(attr); } } newTag.IsSelfClosing = tag.IsSelfClosing; yield return(newTag); } else { yield return(node); } if (state == MinifyState.LastCharWasSpace && settings.InlineElements.Contains(node.Value)) { if (HtmlTextWriter.VoidElements.Contains(node.Value)) { state = MinifyState.Compressed; } else { state = MinifyState.InlineStartAfterSpace; } } } else { yield return(node); if (state == MinifyState.InlineStartAfterSpace) { state = MinifyState.Compressed; } } if (node.Type == HtmlTokenType.StartTag && settings.PreserveInnerSpaceTags.Contains(node.Value)) { tagState = ContainingTag.WhitespacePreserve; } else if (tag?.Value == "script") { var type = tag["type"]; if (string.IsNullOrEmpty(type)) { type = "application/javascript"; } if (settings.ScriptTypesToCompress.Contains(type)) { tagState = ContainingTag.Script; } else { tagState = ContainingTag.WhitespacePreserve; } } else if (node.Type == HtmlTokenType.EndTag && (settings.PreserveInnerSpaceTags.Contains(node.Value) || node.Value == "script")) { tagState = ContainingTag.None; } } } }
/// <summary> /// Sanitizes the specified HTML, removing scripts, styles, and tags /// which might pose a security concern /// </summary> /// <param name="reader">A stream of <see cref="HtmlNode"/></param> /// <param name="settings">Settings controlling what CSS and HTML is permitted in the result</param> /// <returns>A stream of sanitized <see cref="HtmlNode"/></returns> /// <remarks> /// The goal of sanitization is to prevent XSS patterns /// described on <a href="https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet">XSS Filter Evasion Cheat Sheet</a> /// </remarks> public static IEnumerable <HtmlNode> Sanitize(this IEnumerable <HtmlNode> reader, HtmlSanitizeSettings settings) { var removeDepth = -1; var inStyle = false; settings = settings ?? HtmlSanitizeSettings.ReadOnlyDefault; foreach (var origToken in reader) { var token = origToken; if (token.Type == HtmlTokenType.StartTag && _emailRegex.IsMatch(token.Value)) { token = new HtmlText(token.Position, "<" + token.Value + ">"); } switch (token.Type) { case HtmlTokenType.Text: if (removeDepth < 0) { if (inStyle) { yield return(new HtmlText(token.Position, SanitizeCss(token.Value, settings, true))); } else { yield return(token); } } break; case HtmlTokenType.Comment: // No need to risk weird comments that might be interpreted as content (e.g. in IE) break; case HtmlTokenType.Doctype: // Doctypes should not appear in snippets break; case HtmlTokenType.StartTag: var tag = (HtmlStartTag)token; if (removeDepth < 0) { if (settings.AllowedTags.Contains(token.Value)) { if (token.Value == "style") { inStyle = true; } var allowed = AllowedAttributes(tag, settings).ToArray(); if (tag.Value == "img" && !allowed.Any(k => k.Key == "src")) { if (!HtmlTextWriter.VoidElements.Contains(tag.Value) && !tag.IsSelfClosing) { removeDepth = 0; } } else { var newTag = new HtmlStartTag(tag.Position, tag.Value); newTag.IsSelfClosing = tag.IsSelfClosing; foreach (var attr in allowed) { newTag.Attributes.Add(attr); } yield return(newTag); } } else if (!HtmlTextWriter.VoidElements.Contains(tag.Value) && !tag.IsSelfClosing) { removeDepth = 0; } } else { if (!HtmlTextWriter.VoidElements.Contains(tag.Value) && !tag.IsSelfClosing) { removeDepth++; } } break; case HtmlTokenType.EndTag: if (removeDepth < 0 && settings.AllowedTags.Contains(token.Value)) { yield return(token); } else { removeDepth--; } if (token.Value == "style") { inStyle = false; } break; } } }
private static IEnumerable <KeyValuePair <string, string> > AllowedAttributes(HtmlStartTag tag, HtmlSanitizeSettings settings) { for (var i = 0; i < tag.Attributes.Count; i++) { if (!settings.AllowedAttributes.Contains(tag.Attributes[i].Key)) { // Do nothing } else if (string.Equals(tag.Attributes[i].Key, "style", StringComparison.OrdinalIgnoreCase)) { var style = SanitizeCss(tag.Attributes[i].Value, settings, false); if (!style.IsNullOrWhiteSpace()) { yield return(new KeyValuePair <string, string>(tag.Attributes[i].Key, style)); } } else if (settings.UriAttributes.Contains(tag.Attributes[i].Key)) { var url = SanitizeUrl(tag.Attributes[i].Value, settings); if (url != null) { yield return(new KeyValuePair <string, string>(tag.Attributes[i].Key, url)); } } else if (!tag.Attributes[i].Value.StartsWith("&{")) { yield return(tag.Attributes[i]); } } }