Example #1
0
 /// <summary>
 /// Constructs instance of HTML tag
 /// </summary>
 /// <param name="name">Name</param>
 /// <param name="nameInLowercase">Name in lowercase</param>
 /// <param name="attributes">List of attributes</param>
 /// <param name="flags">Flags</param>
 public HtmlTag(string name, string nameInLowercase, IList <HtmlAttribute> attributes, HtmlTagFlags flags)
 {
     Name            = name;
     NameInLowercase = nameInLowercase;
     Attributes      = attributes;
     Flags           = flags;
 }
        /// <summary>
        /// Gets a HTML attribute type
        /// </summary>
        /// <param name="tagNameInLowercase">Tag name in lowercase</param>
        /// <param name="tagFlags">Tag flags</param>
        /// <param name="attributeNameInLowercase">Attribute name in lowercase</param>
        /// <param name="attributes">List of attributes</param>
        /// <returns>Attribute type</returns>
        public HtmlAttributeType GetAttributeType(string tagNameInLowercase, HtmlTagFlags tagFlags,
                                                  string attributeNameInLowercase, List <HtmlAttribute> attributes)
        {
            HtmlAttributeType attributeType = HtmlAttributeType.Unknown;

            if (attributeNameInLowercase == "class")
            {
                attributeType = HtmlAttributeType.ClassName;
            }
            else if (attributeNameInLowercase == "style")
            {
                attributeType = HtmlAttributeType.Style;
            }
            else if (IsEventAttribute(attributeNameInLowercase))
            {
                attributeType = HtmlAttributeType.Event;
            }

            if (attributeType == HtmlAttributeType.Unknown && !tagFlags.IsSet(HtmlTagFlags.Xml))
            {
                if (IsBooleanAttribute(attributeNameInLowercase))
                {
                    attributeType = HtmlAttributeType.Boolean;
                }
                else if (IsNumericAttribute(tagNameInLowercase, attributeNameInLowercase))
                {
                    attributeType = HtmlAttributeType.Numeric;
                }
                else if (IsUriBasedAttribute(tagNameInLowercase, attributeNameInLowercase,
                                             attributes))
                {
                    attributeType = HtmlAttributeType.Uri;
                }
            }

            if (attributeType == HtmlAttributeType.Unknown)
            {
                attributeType = IsXmlBasedAttribute(attributeNameInLowercase) ?
                                HtmlAttributeType.Xml : HtmlAttributeType.Text;
            }

            return(attributeType);
        }
Example #3
0
 /// <summary>
 /// Constructs instance of HTML tag
 /// </summary>
 /// <param name="name">Name</param>
 /// <param name="flags">Flags</param>
 public HtmlTag(string name, HtmlTagFlags flags)
     : this(name, new List <HtmlAttribute>(), flags)
 {
 }
 internal static bool IsSet(this HtmlTagFlags source, HtmlTagFlags flag)
 {
     return((source & flag) == flag);
 }
Example #5
0
 /// <summary>
 /// Constructs instance of HTML tag
 /// </summary>
 /// <param name="name">Name</param>
 /// <param name="attributes">List of attributes</param>
 /// <param name="flags">Flags</param>
 public HtmlTag(string name, IList<HtmlAttribute> attributes, HtmlTagFlags flags)
 {
     Name = name;
     Attributes = attributes;
     Flags = flags;
 }
Example #6
0
 /// <summary>
 /// Constructs instance of HTML tag
 /// </summary>
 /// <param name="name">Name</param>
 /// <param name="flags">Flags</param>
 public HtmlTag(string name, HtmlTagFlags flags)
     : this(name, new List<HtmlAttribute>(), flags)
 {
 }
Example #7
0
        /// <summary>
        /// Parses a end tag
        /// </summary>
        /// <param name="tagName">Tag name</param>
        /// <param name="tagNameInLowercase">Tag name in lowercase</param>
        private void ParseEndTag(string tagName, string tagNameInLowercase)
        {
            int  endTagIndex     = 0;
            int  lastTagIndex    = _tagStack.Count - 1;
            bool tagNameNotEmpty = !string.IsNullOrEmpty(tagName);

            HtmlParsingHandlers.EndTagDelegate endTagHandler = _handlers.EndTag;

            if (tagNameNotEmpty)
            {
                for (endTagIndex = lastTagIndex; endTagIndex >= 0; endTagIndex--)
                {
                    if (_tagStack[endTagIndex].NameInLowercase == tagNameInLowercase)
                    {
                        break;
                    }
                }
            }

            if (endTagIndex >= 0)
            {
                // Close all the open elements, up the stack
                if (endTagHandler != null)
                {
                    for (int tagIndex = lastTagIndex; tagIndex >= endTagIndex; tagIndex--)
                    {
                        HtmlTag      startTag = _tagStack[tagIndex];
                        string       startTagNameInLowercase = startTag.NameInLowercase;
                        HtmlTagFlags startTagFlags           = startTag.Flags;

                        string endTagName;
                        if (tagNameNotEmpty && tagNameInLowercase == startTagNameInLowercase)
                        {
                            endTagName = tagName;
                        }
                        else
                        {
                            endTagName = startTag.Name;
                        }

                        if (_xmlTagStack.Count > 0 && !startTagFlags.IsSet(HtmlTagFlags.NonIndependent))
                        {
                            _xmlTagStack.Pop();
                        }

                        var endTag = new HtmlTag(endTagName, startTagNameInLowercase, startTagFlags);
                        endTagHandler(_context, endTag);
                    }
                }

                // Remove the open elements from the stack
                if (endTagIndex <= lastTagIndex)
                {
                    int tagToRemoveStartIndex = endTagIndex;
                    int tagsToRemoveCount     = lastTagIndex - endTagIndex + 1;

                    _tagStack.RemoveRange(tagToRemoveStartIndex, tagsToRemoveCount);
                }
            }
            else if (tagNameNotEmpty && _conditionalCommentOpened)
            {
                if (_xmlTagStack.Count > 0 && _tagTypeDeterminer.IsXmlBasedTag(tagNameInLowercase))
                {
                    _xmlTagStack.Pop();
                }

                var endTag = new HtmlTag(tagName, tagNameInLowercase, GetTagFlagsByName(tagNameInLowercase));
                endTagHandler?.Invoke(_context, endTag);
            }
        }
Example #8
0
        /// <summary>
        /// Parses a start tag
        /// </summary>
        /// <param name="tagName">Tag name</param>
        /// <param name="tagNameInLowercase">Tag name in lowercase</param>
        /// <param name="attributes">List of attributes</param>
        /// <param name="isEmptyTag">Flag that tag is empty</param>
        private void ParseStartTag(string tagName, string tagNameInLowercase, List <HtmlAttribute> attributes,
                                   bool isEmptyTag)
        {
            HtmlTagFlags tagFlags = GetTagFlagsByName(tagNameInLowercase);

            if (tagFlags.IsSet(HtmlTagFlags.Optional))
            {
                HtmlTag lastStackedTag = _tagStack.LastOrDefault();
                if (lastStackedTag != null && lastStackedTag.NameInLowercase == tagNameInLowercase)
                {
                    ParseEndTag(lastStackedTag.Name, lastStackedTag.NameInLowercase);
                }
                else
                {
                    if (tagNameInLowercase == "body" && _tagStack.Any(t => t.NameInLowercase == "head"))
                    {
                        HtmlTag headTag = _tagStack.First(t => t.NameInLowercase == "head");
                        ParseEndTag(headTag.Name, headTag.NameInLowercase);
                    }
                }
            }

            if (tagFlags.IsSet(HtmlTagFlags.Empty))
            {
                isEmptyTag = true;
            }
            else if (isEmptyTag)
            {
                tagFlags |= HtmlTagFlags.Empty;
            }

            int attributeCount = attributes.Count;

            for (int attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++)
            {
                HtmlAttribute attribute = attributes[attributeIndex];
                attribute.Type = _attributeTypeDeterminer.GetAttributeType(tagNameInLowercase, tagFlags,
                                                                           attribute.NameInLowercase, attributes);
            }

            var tag = new HtmlTag(tagName, tagNameInLowercase, attributes, tagFlags);

            if (!isEmptyTag)
            {
                if (_conditionalCommentOpened)
                {
                    HtmlConditionalComment     lastConditionalComment     = _conditionalCommentStack.Peek();
                    HtmlConditionalCommentType lastConditionalCommentType = lastConditionalComment.Type;

                    if (tagFlags.IsSet(HtmlTagFlags.EmbeddedCode) ||
                        lastConditionalCommentType == HtmlConditionalCommentType.RevealedValidating ||
                        lastConditionalCommentType == HtmlConditionalCommentType.RevealedValidatingSimplified)
                    {
                        _tagStack.Add(tag);
                    }
                }
                else
                {
                    _tagStack.Add(tag);
                }
            }

            _handlers.StartTag?.Invoke(_context, tag);

            if (tagFlags.IsSet(HtmlTagFlags.Xml) && !tagFlags.IsSet(HtmlTagFlags.NonIndependent))
            {
                _xmlTagStack.Push(tagNameInLowercase);
            }
        }
Example #9
0
        /// <summary>
        /// Parses a attributes
        /// </summary>
        /// <param name="tagName">Tag name</param>
        /// <param name="tagNameInLowercase">Tag name in lowercase</param>
        /// <param name="tagFlags">Tag flags</param>
        /// <param name="attributesString">String representation of the attribute list</param>
        /// <param name="attributesCoordinates">Attributes coordinates</param>
        /// <returns>List of attributes</returns>
        private IList<HtmlAttribute> ParseAttributes(string tagName, string tagNameInLowercase, HtmlTagFlags tagFlags,
			string attributesString, SourceCodeNodeCoordinates attributesCoordinates)
        {
            var attributes = new List<HtmlAttribute>();
            if (string.IsNullOrWhiteSpace(attributesString))
            {
                return attributes;
            }

            SourceCodeNodeCoordinates currentAttributesCoordinates = attributesCoordinates;
            int currentPosition = 0;
            MatchCollection attributeMatches = _attributeRegex.Matches(attributesString);

            foreach (Match attributeMatch in attributeMatches)
            {
                GroupCollection groups = attributeMatch.Groups;
                Group attributeNameGroup = groups["attributeName"];
                Group attributeEqualSignGroup = groups["attributeEqualSign"];
                Group attributeValueGroup = groups["attributeValue"];

                string attributeName = attributeNameGroup.Value;
                string attributeNameInLowercase = attributeName;
                if (Utils.ContainsUppercaseCharacters(attributeName))
                {
                    attributeNameInLowercase = attributeName.ToLowerInvariant();
                }
                string attributeValue = null;

                if (attributeEqualSignGroup.Success)
                {
                    if (attributeValueGroup.Success)
                    {
                        attributeValue = attributeValueGroup.Value;
                        if (!string.IsNullOrWhiteSpace(attributeValue))
                        {
                            attributeValue = HtmlAttribute.HtmlAttributeDecode(attributeValue);
                        }
                    }
                    else
                    {
                        attributeValue = string.Empty;
                    }
                }

                var attributeNameCoordinates = SourceCodeNodeCoordinates.Empty;
                int attributeNamePosition = -1;
                if (attributeNameGroup.Success)
                {
                    attributeNamePosition = attributeNameGroup.Index;
                }

                if (attributeNamePosition != -1)
                {
                    int lineBreakCount;
                    int charRemainderCount;

                    SourceCodeNavigator.CalculateLineBreakCount(attributesString, currentPosition,
                        attributeNamePosition - currentPosition, out lineBreakCount, out charRemainderCount);
                    attributeNameCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates(
                        currentAttributesCoordinates, lineBreakCount, charRemainderCount);

                    currentAttributesCoordinates = attributeNameCoordinates;
                    currentPosition = attributeNamePosition;
                }

                var attributeValueCoordinates = SourceCodeNodeCoordinates.Empty;
                int attributeValuePosition = -1;
                if (attributeValueGroup.Success)
                {
                    attributeValuePosition = attributeValueGroup.Index;
                }

                if (attributeValuePosition != -1)
                {
                    int lineBreakCount;
                    int charRemainderCount;

                    SourceCodeNavigator.CalculateLineBreakCount(attributesString, currentPosition,
                        attributeValuePosition - currentPosition, out lineBreakCount, out charRemainderCount);
                    attributeValueCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates(
                        currentAttributesCoordinates, lineBreakCount, charRemainderCount);

                    currentAttributesCoordinates = attributeValueCoordinates;
                    currentPosition = attributeValuePosition;
                }

                HtmlAttributeType attributeType = GetAttributeType(tagNameInLowercase, tagFlags,
                    attributeNameInLowercase, attributes);

                attributes.Add(new HtmlAttribute(attributeName, attributeNameInLowercase, attributeValue,
                    attributeType, attributeNameCoordinates, attributeValueCoordinates));
            }

            return attributes;
        }
Example #10
0
        /// <summary>
        /// Gets a HTML attribute type
        /// </summary>
        /// <param name="tagNameInLowercase">Tag name in lowercase</param>
        /// <param name="tagFlags">Tag flags</param>
        /// <param name="attributeNameInLowercase">Attribute name in lowercase</param>
        /// <param name="attributes">List of attributes</param>
        /// <returns>Attribute type</returns>
        private HtmlAttributeType GetAttributeType(string tagNameInLowercase, HtmlTagFlags tagFlags,
			string attributeNameInLowercase, IList<HtmlAttribute> attributes)
        {
            HtmlAttributeType attributeType = HtmlAttributeType.Unknown;

            if (attributeNameInLowercase == "class")
            {
                attributeType = HtmlAttributeType.ClassName;
            }
            else if (attributeNameInLowercase == "style")
            {
                attributeType = HtmlAttributeType.Style;
            }
            else if (HtmlAttributeTypeHelpers.IsEventAttribute(attributeNameInLowercase))
            {
                attributeType = HtmlAttributeType.Event;
            }

            if (attributeType == HtmlAttributeType.Unknown && !tagFlags.HasFlag(HtmlTagFlags.Xml))
            {
                if (HtmlAttributeTypeHelpers.IsBooleanAttribute(attributeNameInLowercase))
                {
                    attributeType = HtmlAttributeType.Boolean;
                }
                else if (HtmlAttributeTypeHelpers.IsNumericAttribute(tagNameInLowercase, attributeNameInLowercase))
                {
                    attributeType = HtmlAttributeType.Numeric;
                }
                else if (HtmlAttributeTypeHelpers.IsUriBasedAttribute(tagNameInLowercase, attributeNameInLowercase,
                    attributes))
                {
                    attributeType = HtmlAttributeType.Uri;
                }
            }

            if (attributeType == HtmlAttributeType.Unknown)
            {
                attributeType = HtmlAttributeTypeHelpers.IsXmlBasedAttribute(attributeNameInLowercase) ?
                    HtmlAttributeType.Xml : HtmlAttributeType.Text;
            }

            return attributeType;
        }
        /// <summary>
        /// Checks whether it is possible to remove the attribute quotes
        /// </summary>
        /// <param name="tagFlags">HTML tag flags</param>
        /// <param name="attributeValue">Attribute value</param>
        /// <param name="attributeQuotesRemovalMode">Removal mode of HTML attribute quotes</param>
        /// <returns>Result of check (true - can remove; false - cannot remove)</returns>
        private static bool CanRemoveAttributeQuotes(HtmlTagFlags tagFlags, string attributeValue,
			HtmlAttributeQuotesRemovalMode attributeQuotesRemovalMode)
        {
            bool result = false;

            if (!tagFlags.Xml && attributeQuotesRemovalMode != HtmlAttributeQuotesRemovalMode.KeepQuotes)
            {
                if (!attributeValue.EndsWith("/"))
                {
                    if (attributeQuotesRemovalMode == HtmlAttributeQuotesRemovalMode.Html4)
                    {
                        result = _html4AttributeValueNotRequireQuotesRegex.IsMatch(attributeValue);
                    }
                    else if (attributeQuotesRemovalMode == HtmlAttributeQuotesRemovalMode.Html5)
                    {
                        result = _html5AttributeValueNotRequireQuotesRegex.IsMatch(attributeValue);
                    }
                }
            }

            return result;
        }
        /// <summary>
        /// Text handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="text">Text</param>
        private void TextHandler(MarkupParsingContext context, string text)
        {
            HtmlNodeType nodeType = _currentNodeType;
            string tagName;
            HtmlTagFlags tagFlags;
            IList<HtmlAttribute> attributes;
            if (_currentTag != null)
            {
                tagName = _currentTag.Name;
                tagFlags = _currentTag.Flags;
                attributes = _currentTag.Attributes;
            }
            else
            {
                tagName = string.Empty;
                tagFlags = new HtmlTagFlags();
                attributes = new List<HtmlAttribute>();
            }

            WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode;

            if (nodeType == HtmlNodeType.StartTag && tagFlags.EmbeddedCode)
            {
                switch (tagName)
                {
                    case "script":
                    case "style":
                        string contentType = attributes
                            .Where(a => a.Name == "type")
                            .Select(a => a.Value)
                            .FirstOrDefault()
                            ;

                        if (tagName == "script")
                        {
                            if (string.IsNullOrWhiteSpace(contentType))
                            {
                                string language = attributes
                                    .Where(a => a.Name == "language")
                                    .Select(a => a.Value)
                                    .FirstOrDefault()
                                    ;

                                if (!string.IsNullOrWhiteSpace(language)
                                    && language.Trim().ToLowerInvariant() == "vbscript")
                                {
                                    contentType = VBS_CONTENT_TYPE;
                                }
                            }

                            text = ProcessEmbeddedScriptContent(context, text, contentType);
                        }
                        else if (tagName == "style")
                        {
                            text = ProcessEmbeddedStyleContent(context, text, contentType);
                        }

                        break;
                    case "svg":
                        text = ProcessEmbeddedSvgContent(context, text);
                        break;
                    case "math":
                        text = ProcessEmbeddedMathMlContent(context, text);
                        break;
                }
            }
            else
            {
                if (whitespaceMinificationMode != WhitespaceMinificationMode.None)
                {
                    if (_tagsWithNotRemovableWhitespaceQueue.Count == 0)
                    {
                        if (context.Position == 0)
                        {
                            // Processing of starting whitespace
                            text = text.TrimStart();
                        }
                        else if ((context.Position + text.Length) == context.Length)
                        {
                            // Processing of ending whitespace
                            text = text.TrimEnd();
                        }
                        else if (nodeType == HtmlNodeType.StartTag)
                        {
                            // Processing of whitespace, that followed after the start tag
                            bool allowTrimStart = false;
                            if (tagFlags.Invisible || (tagFlags.NonIndependent && tagFlags.Empty))
                            {
                                allowTrimStart = true;
                            }
                            else
                            {
                                if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium)
                                {
                                    allowTrimStart = tagFlags.Block;
                                }
                                else if (whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive)
                                {
                                    allowTrimStart = (tagFlags.Block
                                        || ((tagFlags.Inline || tagFlags.InlineBlock) && !tagFlags.Empty));
                                }
                            }

                            if (allowTrimStart)
                            {
                                text = text.TrimStart();
                            }
                        }
                        else if (nodeType == HtmlNodeType.EndTag)
                        {
                            // Processing of whitespace, that followed after the end tag
                            bool allowTrimStart = false;
                            if (tagFlags.Invisible || tagFlags.NonIndependent)
                            {
                                allowTrimStart = true;
                            }
                            else
                            {
                                if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium
                                    || whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive)
                                {
                                    allowTrimStart = tagFlags.Block;
                                }
                            }

                            if (allowTrimStart)
                            {
                                text = text.TrimStart();
                            }
                        }
                        else if (nodeType == HtmlNodeType.Doctype || nodeType == HtmlNodeType.XmlDeclaration)
                        {
                            // Processing of whitespace, that followed after the document type declaration
                            // or XML declaration
                            text = text.TrimStart();
                        }

                        if (text.Length > 0)
                        {
                            text = Utils.CollapseWhitespace(text);
                        }
                    }
                    else if (nodeType == HtmlNodeType.StartTag && tagName == "textarea"
                        && string.IsNullOrWhiteSpace(text))
                    {
                        text = string.Empty;
                    }
                }
            }

            _currentNodeType = HtmlNodeType.Text;
            _currentText = text;

            if (text.Length > 0)
            {
                _buffer.Add(text);
            }
        }
        private HtmlTagFlags InnerGetTagFlagsByName(string tagNameInLowercase)
        {
            HtmlTagFlags tagFlags = HtmlTagFlags.None;
            bool         isXml    = false;

            var isHtml = IsHtmlTag(tagNameInLowercase);

            if (!isHtml)
            {
                isXml = IsXmlBasedTag(tagNameInLowercase);
            }

            if (isHtml || isXml)
            {
                if (IsInvisibleTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.Invisible;
                }

                if (IsEmptyTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.Empty;
                }

                if (IsBlockTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.Block;
                }

                if (IsInlineTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.Inline;
                }

                if (IsInlineBlockTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.InlineBlock;
                }

                if (IsNonIndependentTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.NonIndependent;
                }

                if (IsOptionalTag(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.Optional;
                }

                if (IsTagWithEmbeddedCode(tagNameInLowercase))
                {
                    tagFlags |= HtmlTagFlags.EmbeddedCode;
                }

                if (isXml)
                {
                    tagFlags |= HtmlTagFlags.Xml;
                }
            }
            else
            {
                tagFlags = HtmlTagFlags.Custom;
            }

            return(tagFlags);
        }
        /// <summary>
        /// Gets a HTML tag flags by tag name
        /// </summary>
        /// <param name="tagNameInLowercase">Tag name in lowercase</param>
        /// <returns>Tag flags</returns>
        public HtmlTagFlags GetTagFlagsByName(string tagNameInLowercase)
        {
            HtmlTagFlags tagFlags = _htmlTagFlagsCache.GetOrAdd(tagNameInLowercase, InnerGetTagFlagsByName);

            return(tagFlags);
        }