/// <summary> /// Constructs instance of HTML tag /// </summary> /// <param name="name">Name</param> /// <param name="nameInLowercase">Name in lowercase</param> /// <param name="attributes">List of attributes</param> /// <param name="flags">Flags</param> public HtmlTag(string name, string nameInLowercase, IList <HtmlAttribute> attributes, HtmlTagFlags flags) { Name = name; NameInLowercase = nameInLowercase; Attributes = attributes; Flags = flags; }
/// <summary> /// Gets a HTML attribute type /// </summary> /// <param name="tagNameInLowercase">Tag name in lowercase</param> /// <param name="tagFlags">Tag flags</param> /// <param name="attributeNameInLowercase">Attribute name in lowercase</param> /// <param name="attributes">List of attributes</param> /// <returns>Attribute type</returns> public HtmlAttributeType GetAttributeType(string tagNameInLowercase, HtmlTagFlags tagFlags, string attributeNameInLowercase, List <HtmlAttribute> attributes) { HtmlAttributeType attributeType = HtmlAttributeType.Unknown; if (attributeNameInLowercase == "class") { attributeType = HtmlAttributeType.ClassName; } else if (attributeNameInLowercase == "style") { attributeType = HtmlAttributeType.Style; } else if (IsEventAttribute(attributeNameInLowercase)) { attributeType = HtmlAttributeType.Event; } if (attributeType == HtmlAttributeType.Unknown && !tagFlags.IsSet(HtmlTagFlags.Xml)) { if (IsBooleanAttribute(attributeNameInLowercase)) { attributeType = HtmlAttributeType.Boolean; } else if (IsNumericAttribute(tagNameInLowercase, attributeNameInLowercase)) { attributeType = HtmlAttributeType.Numeric; } else if (IsUriBasedAttribute(tagNameInLowercase, attributeNameInLowercase, attributes)) { attributeType = HtmlAttributeType.Uri; } } if (attributeType == HtmlAttributeType.Unknown) { attributeType = IsXmlBasedAttribute(attributeNameInLowercase) ? HtmlAttributeType.Xml : HtmlAttributeType.Text; } return(attributeType); }
/// <summary> /// Constructs instance of HTML tag /// </summary> /// <param name="name">Name</param> /// <param name="flags">Flags</param> public HtmlTag(string name, HtmlTagFlags flags) : this(name, new List <HtmlAttribute>(), flags) { }
internal static bool IsSet(this HtmlTagFlags source, HtmlTagFlags flag) { return((source & flag) == flag); }
/// <summary> /// Constructs instance of HTML tag /// </summary> /// <param name="name">Name</param> /// <param name="attributes">List of attributes</param> /// <param name="flags">Flags</param> public HtmlTag(string name, IList<HtmlAttribute> attributes, HtmlTagFlags flags) { Name = name; Attributes = attributes; Flags = flags; }
/// <summary> /// Constructs instance of HTML tag /// </summary> /// <param name="name">Name</param> /// <param name="flags">Flags</param> public HtmlTag(string name, HtmlTagFlags flags) : this(name, new List<HtmlAttribute>(), flags) { }
/// <summary> /// Parses a end tag /// </summary> /// <param name="tagName">Tag name</param> /// <param name="tagNameInLowercase">Tag name in lowercase</param> private void ParseEndTag(string tagName, string tagNameInLowercase) { int endTagIndex = 0; int lastTagIndex = _tagStack.Count - 1; bool tagNameNotEmpty = !string.IsNullOrEmpty(tagName); HtmlParsingHandlers.EndTagDelegate endTagHandler = _handlers.EndTag; if (tagNameNotEmpty) { for (endTagIndex = lastTagIndex; endTagIndex >= 0; endTagIndex--) { if (_tagStack[endTagIndex].NameInLowercase == tagNameInLowercase) { break; } } } if (endTagIndex >= 0) { // Close all the open elements, up the stack if (endTagHandler != null) { for (int tagIndex = lastTagIndex; tagIndex >= endTagIndex; tagIndex--) { HtmlTag startTag = _tagStack[tagIndex]; string startTagNameInLowercase = startTag.NameInLowercase; HtmlTagFlags startTagFlags = startTag.Flags; string endTagName; if (tagNameNotEmpty && tagNameInLowercase == startTagNameInLowercase) { endTagName = tagName; } else { endTagName = startTag.Name; } if (_xmlTagStack.Count > 0 && !startTagFlags.IsSet(HtmlTagFlags.NonIndependent)) { _xmlTagStack.Pop(); } var endTag = new HtmlTag(endTagName, startTagNameInLowercase, startTagFlags); endTagHandler(_context, endTag); } } // Remove the open elements from the stack if (endTagIndex <= lastTagIndex) { int tagToRemoveStartIndex = endTagIndex; int tagsToRemoveCount = lastTagIndex - endTagIndex + 1; _tagStack.RemoveRange(tagToRemoveStartIndex, tagsToRemoveCount); } } else if (tagNameNotEmpty && _conditionalCommentOpened) { if (_xmlTagStack.Count > 0 && _tagTypeDeterminer.IsXmlBasedTag(tagNameInLowercase)) { _xmlTagStack.Pop(); } var endTag = new HtmlTag(tagName, tagNameInLowercase, GetTagFlagsByName(tagNameInLowercase)); endTagHandler?.Invoke(_context, endTag); } }
/// <summary> /// Parses a start tag /// </summary> /// <param name="tagName">Tag name</param> /// <param name="tagNameInLowercase">Tag name in lowercase</param> /// <param name="attributes">List of attributes</param> /// <param name="isEmptyTag">Flag that tag is empty</param> private void ParseStartTag(string tagName, string tagNameInLowercase, List <HtmlAttribute> attributes, bool isEmptyTag) { HtmlTagFlags tagFlags = GetTagFlagsByName(tagNameInLowercase); if (tagFlags.IsSet(HtmlTagFlags.Optional)) { HtmlTag lastStackedTag = _tagStack.LastOrDefault(); if (lastStackedTag != null && lastStackedTag.NameInLowercase == tagNameInLowercase) { ParseEndTag(lastStackedTag.Name, lastStackedTag.NameInLowercase); } else { if (tagNameInLowercase == "body" && _tagStack.Any(t => t.NameInLowercase == "head")) { HtmlTag headTag = _tagStack.First(t => t.NameInLowercase == "head"); ParseEndTag(headTag.Name, headTag.NameInLowercase); } } } if (tagFlags.IsSet(HtmlTagFlags.Empty)) { isEmptyTag = true; } else if (isEmptyTag) { tagFlags |= HtmlTagFlags.Empty; } int attributeCount = attributes.Count; for (int attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++) { HtmlAttribute attribute = attributes[attributeIndex]; attribute.Type = _attributeTypeDeterminer.GetAttributeType(tagNameInLowercase, tagFlags, attribute.NameInLowercase, attributes); } var tag = new HtmlTag(tagName, tagNameInLowercase, attributes, tagFlags); if (!isEmptyTag) { if (_conditionalCommentOpened) { HtmlConditionalComment lastConditionalComment = _conditionalCommentStack.Peek(); HtmlConditionalCommentType lastConditionalCommentType = lastConditionalComment.Type; if (tagFlags.IsSet(HtmlTagFlags.EmbeddedCode) || lastConditionalCommentType == HtmlConditionalCommentType.RevealedValidating || lastConditionalCommentType == HtmlConditionalCommentType.RevealedValidatingSimplified) { _tagStack.Add(tag); } } else { _tagStack.Add(tag); } } _handlers.StartTag?.Invoke(_context, tag); if (tagFlags.IsSet(HtmlTagFlags.Xml) && !tagFlags.IsSet(HtmlTagFlags.NonIndependent)) { _xmlTagStack.Push(tagNameInLowercase); } }
/// <summary> /// Parses a attributes /// </summary> /// <param name="tagName">Tag name</param> /// <param name="tagNameInLowercase">Tag name in lowercase</param> /// <param name="tagFlags">Tag flags</param> /// <param name="attributesString">String representation of the attribute list</param> /// <param name="attributesCoordinates">Attributes coordinates</param> /// <returns>List of attributes</returns> private IList<HtmlAttribute> ParseAttributes(string tagName, string tagNameInLowercase, HtmlTagFlags tagFlags, string attributesString, SourceCodeNodeCoordinates attributesCoordinates) { var attributes = new List<HtmlAttribute>(); if (string.IsNullOrWhiteSpace(attributesString)) { return attributes; } SourceCodeNodeCoordinates currentAttributesCoordinates = attributesCoordinates; int currentPosition = 0; MatchCollection attributeMatches = _attributeRegex.Matches(attributesString); foreach (Match attributeMatch in attributeMatches) { GroupCollection groups = attributeMatch.Groups; Group attributeNameGroup = groups["attributeName"]; Group attributeEqualSignGroup = groups["attributeEqualSign"]; Group attributeValueGroup = groups["attributeValue"]; string attributeName = attributeNameGroup.Value; string attributeNameInLowercase = attributeName; if (Utils.ContainsUppercaseCharacters(attributeName)) { attributeNameInLowercase = attributeName.ToLowerInvariant(); } string attributeValue = null; if (attributeEqualSignGroup.Success) { if (attributeValueGroup.Success) { attributeValue = attributeValueGroup.Value; if (!string.IsNullOrWhiteSpace(attributeValue)) { attributeValue = HtmlAttribute.HtmlAttributeDecode(attributeValue); } } else { attributeValue = string.Empty; } } var attributeNameCoordinates = SourceCodeNodeCoordinates.Empty; int attributeNamePosition = -1; if (attributeNameGroup.Success) { attributeNamePosition = attributeNameGroup.Index; } if (attributeNamePosition != -1) { int lineBreakCount; int charRemainderCount; SourceCodeNavigator.CalculateLineBreakCount(attributesString, currentPosition, attributeNamePosition - currentPosition, out lineBreakCount, out charRemainderCount); attributeNameCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates( currentAttributesCoordinates, lineBreakCount, charRemainderCount); currentAttributesCoordinates = attributeNameCoordinates; currentPosition = attributeNamePosition; } var attributeValueCoordinates = SourceCodeNodeCoordinates.Empty; int attributeValuePosition = -1; if (attributeValueGroup.Success) { attributeValuePosition = attributeValueGroup.Index; } if (attributeValuePosition != -1) { int lineBreakCount; int charRemainderCount; SourceCodeNavigator.CalculateLineBreakCount(attributesString, currentPosition, attributeValuePosition - currentPosition, out lineBreakCount, out charRemainderCount); attributeValueCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates( currentAttributesCoordinates, lineBreakCount, charRemainderCount); currentAttributesCoordinates = attributeValueCoordinates; currentPosition = attributeValuePosition; } HtmlAttributeType attributeType = GetAttributeType(tagNameInLowercase, tagFlags, attributeNameInLowercase, attributes); attributes.Add(new HtmlAttribute(attributeName, attributeNameInLowercase, attributeValue, attributeType, attributeNameCoordinates, attributeValueCoordinates)); } return attributes; }
/// <summary> /// Gets a HTML attribute type /// </summary> /// <param name="tagNameInLowercase">Tag name in lowercase</param> /// <param name="tagFlags">Tag flags</param> /// <param name="attributeNameInLowercase">Attribute name in lowercase</param> /// <param name="attributes">List of attributes</param> /// <returns>Attribute type</returns> private HtmlAttributeType GetAttributeType(string tagNameInLowercase, HtmlTagFlags tagFlags, string attributeNameInLowercase, IList<HtmlAttribute> attributes) { HtmlAttributeType attributeType = HtmlAttributeType.Unknown; if (attributeNameInLowercase == "class") { attributeType = HtmlAttributeType.ClassName; } else if (attributeNameInLowercase == "style") { attributeType = HtmlAttributeType.Style; } else if (HtmlAttributeTypeHelpers.IsEventAttribute(attributeNameInLowercase)) { attributeType = HtmlAttributeType.Event; } if (attributeType == HtmlAttributeType.Unknown && !tagFlags.HasFlag(HtmlTagFlags.Xml)) { if (HtmlAttributeTypeHelpers.IsBooleanAttribute(attributeNameInLowercase)) { attributeType = HtmlAttributeType.Boolean; } else if (HtmlAttributeTypeHelpers.IsNumericAttribute(tagNameInLowercase, attributeNameInLowercase)) { attributeType = HtmlAttributeType.Numeric; } else if (HtmlAttributeTypeHelpers.IsUriBasedAttribute(tagNameInLowercase, attributeNameInLowercase, attributes)) { attributeType = HtmlAttributeType.Uri; } } if (attributeType == HtmlAttributeType.Unknown) { attributeType = HtmlAttributeTypeHelpers.IsXmlBasedAttribute(attributeNameInLowercase) ? HtmlAttributeType.Xml : HtmlAttributeType.Text; } return attributeType; }
/// <summary> /// Checks whether it is possible to remove the attribute quotes /// </summary> /// <param name="tagFlags">HTML tag flags</param> /// <param name="attributeValue">Attribute value</param> /// <param name="attributeQuotesRemovalMode">Removal mode of HTML attribute quotes</param> /// <returns>Result of check (true - can remove; false - cannot remove)</returns> private static bool CanRemoveAttributeQuotes(HtmlTagFlags tagFlags, string attributeValue, HtmlAttributeQuotesRemovalMode attributeQuotesRemovalMode) { bool result = false; if (!tagFlags.Xml && attributeQuotesRemovalMode != HtmlAttributeQuotesRemovalMode.KeepQuotes) { if (!attributeValue.EndsWith("/")) { if (attributeQuotesRemovalMode == HtmlAttributeQuotesRemovalMode.Html4) { result = _html4AttributeValueNotRequireQuotesRegex.IsMatch(attributeValue); } else if (attributeQuotesRemovalMode == HtmlAttributeQuotesRemovalMode.Html5) { result = _html5AttributeValueNotRequireQuotesRegex.IsMatch(attributeValue); } } } return result; }
/// <summary> /// Text handler /// </summary> /// <param name="context">Markup parsing context</param> /// <param name="text">Text</param> private void TextHandler(MarkupParsingContext context, string text) { HtmlNodeType nodeType = _currentNodeType; string tagName; HtmlTagFlags tagFlags; IList<HtmlAttribute> attributes; if (_currentTag != null) { tagName = _currentTag.Name; tagFlags = _currentTag.Flags; attributes = _currentTag.Attributes; } else { tagName = string.Empty; tagFlags = new HtmlTagFlags(); attributes = new List<HtmlAttribute>(); } WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode; if (nodeType == HtmlNodeType.StartTag && tagFlags.EmbeddedCode) { switch (tagName) { case "script": case "style": string contentType = attributes .Where(a => a.Name == "type") .Select(a => a.Value) .FirstOrDefault() ; if (tagName == "script") { if (string.IsNullOrWhiteSpace(contentType)) { string language = attributes .Where(a => a.Name == "language") .Select(a => a.Value) .FirstOrDefault() ; if (!string.IsNullOrWhiteSpace(language) && language.Trim().ToLowerInvariant() == "vbscript") { contentType = VBS_CONTENT_TYPE; } } text = ProcessEmbeddedScriptContent(context, text, contentType); } else if (tagName == "style") { text = ProcessEmbeddedStyleContent(context, text, contentType); } break; case "svg": text = ProcessEmbeddedSvgContent(context, text); break; case "math": text = ProcessEmbeddedMathMlContent(context, text); break; } } else { if (whitespaceMinificationMode != WhitespaceMinificationMode.None) { if (_tagsWithNotRemovableWhitespaceQueue.Count == 0) { if (context.Position == 0) { // Processing of starting whitespace text = text.TrimStart(); } else if ((context.Position + text.Length) == context.Length) { // Processing of ending whitespace text = text.TrimEnd(); } else if (nodeType == HtmlNodeType.StartTag) { // Processing of whitespace, that followed after the start tag bool allowTrimStart = false; if (tagFlags.Invisible || (tagFlags.NonIndependent && tagFlags.Empty)) { allowTrimStart = true; } else { if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium) { allowTrimStart = tagFlags.Block; } else if (whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive) { allowTrimStart = (tagFlags.Block || ((tagFlags.Inline || tagFlags.InlineBlock) && !tagFlags.Empty)); } } if (allowTrimStart) { text = text.TrimStart(); } } else if (nodeType == HtmlNodeType.EndTag) { // Processing of whitespace, that followed after the end tag bool allowTrimStart = false; if (tagFlags.Invisible || tagFlags.NonIndependent) { allowTrimStart = true; } else { if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium || whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive) { allowTrimStart = tagFlags.Block; } } if (allowTrimStart) { text = text.TrimStart(); } } else if (nodeType == HtmlNodeType.Doctype || nodeType == HtmlNodeType.XmlDeclaration) { // Processing of whitespace, that followed after the document type declaration // or XML declaration text = text.TrimStart(); } if (text.Length > 0) { text = Utils.CollapseWhitespace(text); } } else if (nodeType == HtmlNodeType.StartTag && tagName == "textarea" && string.IsNullOrWhiteSpace(text)) { text = string.Empty; } } } _currentNodeType = HtmlNodeType.Text; _currentText = text; if (text.Length > 0) { _buffer.Add(text); } }
private HtmlTagFlags InnerGetTagFlagsByName(string tagNameInLowercase) { HtmlTagFlags tagFlags = HtmlTagFlags.None; bool isXml = false; var isHtml = IsHtmlTag(tagNameInLowercase); if (!isHtml) { isXml = IsXmlBasedTag(tagNameInLowercase); } if (isHtml || isXml) { if (IsInvisibleTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.Invisible; } if (IsEmptyTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.Empty; } if (IsBlockTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.Block; } if (IsInlineTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.Inline; } if (IsInlineBlockTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.InlineBlock; } if (IsNonIndependentTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.NonIndependent; } if (IsOptionalTag(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.Optional; } if (IsTagWithEmbeddedCode(tagNameInLowercase)) { tagFlags |= HtmlTagFlags.EmbeddedCode; } if (isXml) { tagFlags |= HtmlTagFlags.Xml; } } else { tagFlags = HtmlTagFlags.Custom; } return(tagFlags); }
/// <summary> /// Gets a HTML tag flags by tag name /// </summary> /// <param name="tagNameInLowercase">Tag name in lowercase</param> /// <returns>Tag flags</returns> public HtmlTagFlags GetTagFlagsByName(string tagNameInLowercase) { HtmlTagFlags tagFlags = _htmlTagFlagsCache.GetOrAdd(tagNameInLowercase, InnerGetTagFlagsByName); return(tagFlags); }