Esempio n. 1
0
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            _nodetype        = type;
            _ownerdocument   = ownerdocument;
            _outerstartindex = index;
            switch (type)
            {
            case HtmlNodeType.Document:
                Name     = HtmlNodeTypeNameDocument;
                _endnode = this;
                break;

            case HtmlNodeType.Comment:
                Name     = HtmlNodeTypeNameComment;
                _endnode = this;
                break;

            case HtmlNodeType.Text:
                Name     = HtmlNodeTypeNameText;
                _endnode = this;
                break;
            }
            if (((_ownerdocument.Openednodes != null) && !Closed) && (-1 != index))
            {
                _ownerdocument.Openednodes.Add(index, this);
            }
            if (((-1 == index) && (type != HtmlNodeType.Comment)) && (type != HtmlNodeType.Text))
            {
                _outerchanged = true;
                _innerchanged = true;
            }
        }
Esempio n. 2
0
        public HtmlNode FindPreviousElement(HtmlNode node, HtmlNodeType nodeType)
        {
            if (node.PreviousSibling != null && node.PreviousSibling.NodeType == nodeType)
            {
                return node.PreviousSibling;
            }

            if (node.ParentNode != null && node.ParentNode.NodeType == nodeType)
            {
                return node.ParentNode;
            }

            // Ignoring Text elements
            if (node.PreviousSibling != null && node.PreviousSibling.NodeType == HtmlNodeType.Text)
            {
                return this.FindPreviousElement(node.PreviousSibling, nodeType);
            }

            // Ignoring Text elements
            if (node.ParentNode != null && node.ParentNode.NodeType == HtmlNodeType.Text)
            {
                return this.FindPreviousElement(node.ParentNode, nodeType);
            }

            if (node.ParentNode != null && node.ParentNode.NodeType == nodeType)
            {
                return node.ParentNode;
            }

            return null;
        }
Esempio n. 3
0
        public HtmlNode FindPreviousElement(HtmlNode node, HtmlNodeType nodeType)
        {
            if (node.PreviousSibling != null && node.PreviousSibling.NodeType == nodeType)
            {
                return(node.PreviousSibling);
            }

            if (node.ParentNode != null && node.ParentNode.NodeType == nodeType)
            {
                return(node.ParentNode);
            }

            // Ignoring Text elements
            if (node.PreviousSibling != null && node.PreviousSibling.NodeType == HtmlNodeType.Text)
            {
                return(this.FindPreviousElement(node.PreviousSibling, nodeType));
            }

            // Ignoring Text elements
            if (node.ParentNode != null && node.ParentNode.NodeType == HtmlNodeType.Text)
            {
                return(this.FindPreviousElement(node.ParentNode, nodeType));
            }

            if (node.ParentNode != null && node.ParentNode.NodeType == nodeType)
            {
                return(node.ParentNode);
            }

            return(null);
        }
Esempio n. 4
0
 protected HtmlNode(HtmlNodeType type)
 {
     _type     = type;
     _parent   = null;
     _previous = null;
     _next     = null;
 }
Esempio n. 5
0
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            this._nodetype        = type;
            this._ownerdocument   = ownerdocument;
            this._outerstartindex = index;
            switch (type)
            {
            case HtmlNodeType.Document:
                this.Name     = HtmlNodeTypeNameDocument;
                this._endnode = this;
                break;

            case HtmlNodeType.Comment:
                this.Name     = HtmlNodeTypeNameComment;
                this._endnode = this;
                break;

            case HtmlNodeType.Text:
                this.Name     = HtmlNodeTypeNameText;
                this._endnode = this;
                break;
            }

            if (this._ownerdocument.Openednodes != null && !this.Closed && -1 != index)
            {
                this._ownerdocument.Openednodes.Add(index, this);
            }
            if (-1 != index || type == HtmlNodeType.Comment || type == HtmlNodeType.Text)
            {
                return;
            }
            this._outerchanged = true;
            this._innerchanged = true;
        }
Esempio n. 6
0
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            this._nodetype        = type;
            this._ownerdocument   = ownerdocument;
            this._outerstartindex = index;
            switch (type)
            {
            case HtmlNodeType.Document:
                this.TagName = HtmlNode.HtmlNodeTypeNameDocument;
                this._closed = true;
                break;

            case HtmlNodeType.Comment:
                this.TagName = HtmlNode.HtmlNodeTypeNameComment;
                this._closed = true;
                break;

            case HtmlNodeType.Text:
                this.TagName = HtmlNode.HtmlNodeTypeNameText;
                this._closed = true;
                break;
            }
            if (this._ownerdocument.Openednodes != null && !this.Closed && -1 != index)
            {
                this._ownerdocument.Openednodes[index] = this;
            }
            if (-1 != index || type != HtmlNodeType.Comment)
            {
            }
        }
Esempio n. 7
0
 public HtmlNode()
 {
     TagId      = HtmlTagId.Unknown;
     NodeType   = HtmlNodeType.Invalid;
     Index      = -1;
     Length     = -1;
     Attributes = null;
 }
Esempio n. 8
0
        internal void RemoveNodesOfTypeComment()
        {
            HtmlNodeType htmlNodeType = HtmlNodeType.Comment;

            Doc.DocumentNode.Descendants()
            .Where(x => x.NodeType.Equals(htmlNodeType))
            .ToList()
            .ForEach(x => x.Remove());
        }
Esempio n. 9
0
        /// <summary>
        /// Gets the child nodes by type.
        /// </summary>
        /// <param name="node">The node.</param>
        /// <param name="type">The type.</param>
        /// <returns></returns>
        /// <exception cref="ArgumentNullException">node</exception>
        public static IEnumerable <HtmlNode> GetChildNodesByType(this HtmlNode node, HtmlNodeType type)
        {
            if (node == null)
            {
                throw new ArgumentNullException(nameof(node));
            }

            return(node.ChildNodes.Where(n => n.NodeType == type));
        }
Esempio n. 10
0
        protected internal HtmlElement(string prefix, string localName, string namespaceURI, HtmlDocument ownerDocument)
            : base(prefix, localName, namespaceURI, ownerDocument)
        {
            _nodeType = IsDocumentType ? HtmlNodeType.DocumentType : HtmlNodeType.Element;
#if DEBUG_HTML_ID
            SetAttribute(DebugIdAttributeName, _debugId.ToString());
            _debugId++;
#endif
        }
Esempio n. 11
0
 internal HtmlNode CreateNode(HtmlNodeType type, int index)
 {
     switch (type)
     {
     case HtmlNodeType.Document:
         return new HtmlCommentNode(this)
            default:
     }
 }
Esempio n. 12
0
 /// <summary>
 /// Производит поиск в файле начала следующего узла (тега, текстового блока, коментария и т.д)
 /// </summary>
 /// <param name="reader">Объект для чтения обрабатываемого файла</param>
 /// <param name="nodeType">Возвращает тип найденного узла</param>
 /// <param name="beginPosition">Возвращает позицию в файле начала найденного узла</param>
 /// <param name="endPosition">Возвращает позицию в файле конца найденного узла</param>
 /// <param name="content">Возвращает имя найденного тега или содержимое текстового блока</param>
 /// <param name="isOpenTag">Возвращяет true если найден открывающийся тег</param>
 /// <returns>Возвращает true при успешном поиске</returns>
 private static bool FindNextNode(BinaryReader reader,
                                  out HtmlNodeType nodeType,
                                  out long beginPosition,
                                  out long endPosition,
                                  out string content,
                                  out bool isOpenTag)
 {
     return(FindNextNode(reader, out nodeType, out beginPosition, out endPosition, out content, out isOpenTag, string.Empty));
 }
Esempio n. 13
0
        /// <summary>
        /// Инициализирует объект типа HtmlNode
        /// </summary>
        /// <param name="parentNode">Родительский элемент</param>
        /// <param name="nodeType">Тип элемента</param>
        /// <param name="tagName">Имя тега</param>
        /// <param name="beginPosition">Позиция в файле начала данного элемента </param>
        /// <param name="endPosition">Позиция в файле конца данного элемента </param>
        public HtmlNode(HtmlNode parentNode, HtmlNodeType nodeType, string tagName, long beginPosition, long endPosition)
        {
            this.ParentNode = parentNode;
            this.children   = null;

            this.nodeType      = nodeType;
            this.tagName       = tagName;
            this.beginPosition = beginPosition;
            this.endPosition   = endPosition;
        }
Esempio n. 14
0
        /// <summary>
        /// Инициализирует объект типа HtmlNode
        /// </summary>
        /// <param name="parentNode">Родительский элемент</param>
        public HtmlNode(HtmlNode parentNode)
        {
            this.ParentNode = parentNode;
            this.children   = null;

            this.nodeType      = HtmlNodeType.None;
            this.tagName       = string.Empty;
            this.beginPosition = -1;
            this.endPosition   = -1;
        }
Esempio n. 15
0
        public static HtmlNode NextSiblingOfType(this HtmlNode node, HtmlNodeType nodeType)
        {
            var next = node.NextSibling;

            while (next != null && next.NodeType != nodeType)
            {
                next = next.NextSibling;
            }
            return(next);
        }
Esempio n. 16
0
        protected override void WriteEndElement(TextWriter result, string name)
        {
            if (!KeepElements.Contains(name))
            {
                return;
            }

            _prevNodeType = HtmlNodeType.None;
            base.WriteEndElement(result, name);
        }
Esempio n. 17
0
 private void PushNodeStart(HtmlNodeType type, int index)
 {
     this._currentnode               = this.CreateNode(type, index);
     this._currentnode._line         = this._line;
     this._currentnode._lineposition = this._lineposition;
     if (type == HtmlNodeType.Element)
     {
         this._currentnode._lineposition--;
     }
     this._currentnode._streamposition = index;
 }
 public HtmlElement(HtmlNodeType nodeType, HtmlElementType type, string attributesAsString, bool isEmpty, int characterPosition)
 {
     this.m_nodeType          = nodeType;
     this.m_elementType       = type;
     this.m_isEmptyElement    = isEmpty;
     this.m_characterPosition = characterPosition;
     if (!string.IsNullOrEmpty(attributesAsString))
     {
         this.m_attributesAsString = attributesAsString;
     }
 }
Esempio n. 19
0
        internal HtmlNode CreateNode(HtmlNodeType type, int index)
        {
            switch (type)
            {
            case HtmlNodeType.Comment:
                return(new HtmlCommentNode(this, index));

            case HtmlNodeType.Text:
                return(new HtmlTextNode(this, index));
            }
            return(new HtmlNode(type, this, index));
        }
Esempio n. 20
0
        protected override void WriteElement(TextWriter result, string name, Dictionary <string, string> attributes, bool empty)
        {
            if (!KeepElements.Contains(name))
            {
                return;
            }

            if (KeepAttributeElements.Contains(name))
            {
                _prevNodeType = HtmlNodeType.Element;
                base.WriteElement(result, name, attributes, empty);
                return;
            }

            _prevNodeType = HtmlNodeType.Element;
            base.WriteElement(result, name, new Dictionary <string, string>(), empty);
        }
        public string GetInnerTextUpToElement(string elementName, HtmlNodeType nodeType)
        {
            var s = new StringBuilder();

            while (Read())
            {
                if (NodeType == nodeType && Name.ToUpper() == elementName.ToUpper())
                {
                    break;
                }
                if (NodeType == HtmlNodeType.Text || NodeType == HtmlNodeType.CDATA)
                {
                    var part = Value.Replace("\r\n", "");
                    s.Append(Value);
                }
            }
            return(s.ToString().Trim());
        }
Esempio n. 22
0
		/// <summary>
		/// Initializes HtmlNode, providing type, owner and where it exists in a collection
		/// </summary>
		/// <param name="type"></param>
		/// <param name="ownerdocument"></param>
		/// <param name="index"></param>
		public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
		{
			_nodetype = type;
			_ownerdocument = ownerdocument;
			_outerstartindex = index;

			switch (type)
			{
				case HtmlNodeType.Comment:
					Name = HtmlNodeTypeNameComment;
					_endnode = this;
					break;

				case HtmlNodeType.Document:
					Name = HtmlNodeTypeNameDocument;
					_endnode = this;
					break;

				case HtmlNodeType.Text:
					Name = HtmlNodeTypeNameText;
					_endnode = this;
					break;
			}

			if (_ownerdocument.Openednodes != null)
			{
				if (!Closed)
				{
					// we use the index as the key

					// -1 means the node comes from public
					if (-1 != index)
					{
						_ownerdocument.Openednodes.Add(index, this);
					}
				}
			}

			if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
			// innerhtml and outerhtml must be calculated
            SetChanged();
		}
Esempio n. 23
0
        protected override void WriteText(TextWriter result, string value)
        {
            if (_prevNodeType == HtmlNodeType.Element && !string.IsNullOrEmpty(value))
            {
                value = value.TrimStart(new[] { '\r', '\n' });
            }

            if (TopTag != null)
            {
                switch (TopTag.ToLowerInvariant())
                {
                case "pre":
                    string text = RewritePreText(RequiredHtmlEncode ? value.HtmlEncode() : value);
                    _prevNodeType = HtmlNodeType.Text;
                    result.Write(text);
                    return;
                }
            }

            _prevNodeType = HtmlNodeType.Text;
            base.WriteText(result, value);
        }
Esempio n. 24
0
        /// <summary>
        /// Initializes <see cref="HtmlNode"/>,providing type,owner document,and the index presenting the position of the new node in its siblings
        /// </summary>
        /// <param name="type"></param>
        /// <param name="ownerDocument"></param>
        /// <param name="index"></param>
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerDocument, int index)
        {
            _nodeType      = type;
            _ownerDocument = ownerDocument;
            _index         = index;

            switch (type)
            {
            case HtmlNodeType.Comment:
                Name     = HtmlNodeTypeNameComment;
                _endNode = this;
                break;

            case HtmlNodeType.Document:
                Name     = HtmlNodeTypeNameDocument;
                _endNode = this;
                break;

            case HtmlNodeType.Text:
                Name     = HtmlNodeTypeNameText;
                _endNode = this;
                break;
            }

            if (_ownerDocument.OpenedNodes != null)
            {
                if (!Closed)
                {
                    //use the index as the key
                    //-1 means the node comes from publicz
                    if (index != -1)
                    {
                        _ownerDocument.OpenedNodes.Add(index, this);
                    }
                }
            }
        }
Esempio n. 25
0
 /// <summary>
 /// Creates a new HtmlNodeParser using the provided reader.
 /// </summary>
 /// <param name="reader">StreamReader that reads from the Html stream.</param>
 internal HtmlNodeParser(StreamReader reader)
 {
     m_Reader = reader;
     m_Attributes = new List<AttributeNode>();
     m_NodeType = HtmlNodeType.None;
 }
Esempio n. 26
0
        /// <summary>
        /// Initializes HtmlNode, providing type, owner and where it exists in a collection
        /// </summary>
        /// <param name="type"></param>
        /// <param name="ownerdocument"></param>
        /// <param name="index"></param>
        protected HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            _nodetype = type;
            _ownerdocument = ownerdocument;
            _outerstartindex = index;

            switch (type)
            {
                case HtmlNodeType.Comment:
                    Name = HtmlNodeTypeNameComment;
                    _endnode = this;
                    break;

                case HtmlNodeType.Document:
                    Name = HtmlNodeTypeNameDocument;
                    _endnode = this;
                    break;

                case HtmlNodeType.Text:
                    Name = HtmlNodeTypeNameText;
                    _endnode = this;
                    break;
            }

            if (_ownerdocument.Openednodes != null)
            {
                if (!Closed)
                {
                    // we use the index as the key

                    // -1 means the node comes from public
                    if (-1 != index)
                    {
                        _ownerdocument.Openednodes.Add(index, this);
                    }
                }
            }

            if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text)) return;
            // innerhtml and outerhtml must be calculated
            _outerchanged = true;
            _innerchanged = true;
        }
Esempio n. 27
0
        /// <summary>
        /// Производит поиск в файле начала следующего узла (тега, текстового блока, коментария и т.д)
        /// </summary>
        /// <param name="reader">Объект для чтения обрабатываемого файла</param>
        /// <param name="nodeType">Возвращает тип найденного узла</param>
        /// <param name="beginPosition">Возвращает позицию в файле начала найденного узла</param>
        /// <param name="endPosition">Возвращает позицию в файле конца найденного узла</param>
        /// <param name="content">Возвращает имя найденного тега или содержимое текстового блока</param>
        /// <param name="isOpenTag">Возвращяет true если найден открывающийся тег</param>
        /// <param name="tagToClose">При ненулевом значении указывает найти ближайший
        /// закрывающийся тег с переданным в параметре именем (для тегов без html-содержимого)</param>
        /// <returns>Возвращает true при успешном поиске</returns>
        private static bool FindNextNode(BinaryReader reader,
                                         out HtmlNodeType nodeType,
                                         out long beginPosition,
                                         out long endPosition,
                                         out string content,
                                         out bool isOpenTag,
                                         string tagToClose)
        {
            try
            {
                beginPosition = -1;
                endPosition   = -1;
                nodeType      = HtmlNodeType.None;
                isOpenTag     = false;
                content       = string.Empty;
                bool isNotEmpty = false;

                // Определяем, что принимается за начало тега:
                // При работе с блоком не содержащим html-разметку (script, style)
                // производится поиск тега, закрывающего этот блок '</...'.
                // При работе с html-разметкой началом тега считается '<'
                string tagBegin;
                if (string.IsNullOrEmpty(tagToClose))
                {
                    tagBegin = "<";
                }
                else
                {
                    tagBegin = "</" + tagToClose;
                }

                while ((reader.PeekChar() >= 0) && !isNotEmpty)
                {
                    // Сохранение начала найденого узла
                    beginPosition = reader.BaseStream.Position;

                    // Если найдено начало тега
                    if (PeekString(reader, tagBegin.Length) == tagBegin)
                    {
                        // Обработка тегов и коментариев:

                        string tagStr = string.Empty;

                        bool isQuoteOpen  = false;
                        bool isApostrOpen = false;

                        char ch;
                        do
                        {
                            ch      = reader.ReadChar();
                            tagStr += ch;

                            // Установка признака открытых кавычек
                            if (ch == '\"')
                            {
                                isQuoteOpen = !isQuoteOpen;
                            }
                            if (ch == '\'')
                            {
                                isApostrOpen = !isApostrOpen;
                            }
                        }while ((reader.PeekChar() >= 0) && (isQuoteOpen || isApostrOpen || (ch != '>')));

                        // Проверка на соответствие синтаксису тега и обработка
                        Regex tagNamePatt  = new Regex(@"^<(\/?)(\w+)(?:\s+.*?)?(\\?)\s*>$", RegexOptions.Singleline);
                        Match tagNameMatch = tagNamePatt.Match(tagStr);
                        if (tagNameMatch.Success)
                        {
                            // Установка типа тега - тег с содержимым или одиночный тег
                            nodeType = (tagNameMatch.Groups[3].Value == "\\") ? HtmlNodeType.SingleTag : HtmlNodeType.ContainerTag;

                            endPosition = reader.BaseStream.Position;
                            content     = tagNameMatch.Groups[2].Value;
                            isOpenTag   = (tagNameMatch.Groups[1].Value != "/");

                            isNotEmpty = true;
                            continue;
                        }

                        // Обработка коментариев
                        if ((tagStr.Length >= 4) && (tagStr.Substring(0, 4) == "<!--"))
                        {
                            string closeCommentStr = tagStr.Substring(tagStr.Length - 3, 3);

                            // Поиск конца коментария
                            while ((reader.PeekChar() >= 0) && (closeCommentStr != "-->"))
                            {
                                do
                                {
                                    ch = reader.ReadChar();
                                    closeCommentStr = closeCommentStr.Substring(1, 2) + ch;
                                }while ((reader.PeekChar() >= 0) && (ch != '>'));
                            }
                            nodeType    = HtmlNodeType.Comment;
                            endPosition = reader.BaseStream.Position;
                            isNotEmpty  = true;
                            continue;
                        }
                    }
                    else
                    {
                        // Обработка блоков текста:

                        content = string.Empty;
                        char ch;

                        // Чтение до следующего тега
                        while ((reader.PeekChar() >= 0) &&
                               ((reader.PeekChar() != tagBegin[0]) || (PeekString(reader, tagBegin.Length) != tagBegin)))
                        {
                            ch         = reader.ReadChar();
                            isNotEmpty = isNotEmpty || ((ch != '\0') && (ch != '\r') && (ch != '\n') &&
                                                        (ch != '\r') && (ch != '\t') && (ch != ' '));
                            content += ch;
                        }

                        // Если текстовый блок содержит информацию
                        if (isNotEmpty)
                        {
                            nodeType    = HtmlNodeType.TextBlock;
                            endPosition = reader.BaseStream.Position;
                        }
                    }
                }

                return(isNotEmpty);
            }
            catch (Exception e)
            {
                Logger.LogError("Ошибка в методе 'FindNextNode'", string.Empty);
                throw e;
            }
        }
 protected HtmlElementNodeBase(HtmlNodeType type, HtmlDocument ownerdocument, int index)
     : base(type, ownerdocument, index)
 {
 }
		public bool Read()
		{
			_nodeType = HtmlNodeType.None;
			_name.Length = 0;
			_value.Length = 0;
			_isEmptyElement = false;

			var attrName = new StringBuilder();
			var attrValue = new StringBuilder();

			var quoteStyle = '"';
			var customDoctype = false;
			StringBuilder entity = null;

			while (_peeker.Read())
			{
				char c = _peeker.Current;

				switch (_state)
				{
					case State.Text:
						if (c == '&')
						{
							entity = new StringBuilder();
							_state = State.Amp;
						}
						else if (c == '<')
						{
							_state = State.Lt;
							if (_value.Length > 0)
							{
								_nodeType = HtmlNodeType.Text;
								return true;
							}
						}
						else
						{
							_value.Append(c);
						}
						break;

					case State.Amp:
						if (c == ';')
						{
							_state = State.Text;
							if (entity.Length > 0)
							{
								_value.Append(DecodeEntity("&" + entity + ";"));
							}
							else
							{
								_value.Append("&");
								_value.Append(";");
							}
						}
						else if (c == '#' && entity.Length == 0)
						{
							entity.Append(c);
						}
						else if (Char.IsLetterOrDigit(c))
						{
							entity.Append(c);
						}
						else
						{
							_state = State.Text;
							_peeker.Push(c);
							if (entity.Length > 0)
							{
								_value.Append(DecodeEntity("&" + entity + ";"));
							}
							else
							{
								_value.Append("&");
							}
							entity = null;
						}
						break;

					case State.Lt:
						if (c == '/')
						{
							_state = State.ElemClose;
						}
						else if (c == '?' && _peeker.Match("xml"))
						{
							_state = State.XmlDeclaration;
							_peeker.Read(3);
						}
						else if (c == '?')
						{
							_state = State.Pi;
						}
						else if (c == '!' && _peeker.Match("--"))
						{
							_peeker.Read(2);
							_state = State.Comment;
						}
						else if (c == '!' && _peeker.Match("[CDATA["))
						{
							_peeker.Read(7);
							_state = State.CData;
						}
						else if (c == '!' && _peeker.Match("DOCTYPE"))
						{
							_peeker.Read(7);
							_state = State.DocType;
						}
						else if (!Char.IsLetter(c))
						{
							_state = State.Text;
							_value.Append('<');
							_value.Append(c);
						}
						else
						{
							_attributes = new StringDictionary();
							_state = State.ElemName;
							_name.Append(c);
						}
						break;

					case State.ElemName:
						if (Char.IsWhiteSpace(c))
						{
							_state = State.ElemAttributes;
						}
						else if (c == '/')
						{
							_isEmptyElement = true;
							_state = State.ElemSingle;
						}
						else if (c == '>')
						{
							_state = State.Text;
							_nodeType = HtmlNodeType.Element;
							return true;
						}
						else
						{
							_name.Append(c);
						}
						break;

					case State.ElemClose:
						if (c == '>')
						{
							_state = State.Text;
							_nodeType = HtmlNodeType.EndElement;
							return true;
						}
						_name.Append(c);
						break;

					case State.ElemSingle:
						if (c == '>')
						{
							_nodeType = HtmlNodeType.Element;
							return true;
						}
						_state = State.Text;
						_nodeType = HtmlNodeType.None;
						_name.Length = 0;
						_value.Length = 0;
						_value.Append(c);
						break;

					case State.ElemAttributes:
						if (c == '>')
						{
							_state = State.Text;
							_nodeType = HtmlNodeType.Element;
							return true;
						}
						else if (c == '/')
						{
							_isEmptyElement = true;
							_state = State.ElemSingle;
						}
						else if (Char.IsWhiteSpace(c)) {}
						else
						{
							_state = State.AttrKey;
							attrName.Append(c);
						}
						break;

					case State.Comment:
						if (c == '-' && _peeker.Match("->"))
						{
							_peeker.Read(2);
							_state = State.Text;
							_nodeType = HtmlNodeType.Comment;
							return true;
						}
						_value.Append(c);
						break;

					case State.CData:
						if (c == ']' && _peeker.Match("]>"))
						{
							_peeker.Read(2);
							_state = State.Text;
							_nodeType = HtmlNodeType.CDATA;
							return true;
						}
						_value.Append(c);
						break;

					case State.XmlDeclaration:
						if (c == '?' && _peeker.Match(">"))
						{
							_peeker.Read(1);
							_state = State.Text;
							_nodeType = HtmlNodeType.XmlDeclaration;
							return true;
						}
						_value.Append(c);
						break;

					case State.DocType:
						if (c == '[')
						{
							customDoctype = true;
						}
						else
						{
							if (customDoctype)
							{
								if (c == ']' && _peeker.Match(">"))
								{
									_peeker.Read(1);
									_state = State.Text;
									_nodeType = HtmlNodeType.DocumentType;
									return true;
								}
								_value.Append(c);
							}
							else
							{
								if (c == '>')
								{
									_state = State.Text;
									_nodeType = HtmlNodeType.DocumentType;
									return true;
								}
								_name.Append(c);
							}
						}

						break;

					case State.Pi:
						if (c == '?' && _peeker.Match(">"))
						{
							_peeker.Read(1);
							_state = State.Text;
							_nodeType = HtmlNodeType.ProcessingInstruction;
							return true;
						}
						if (Char.IsWhiteSpace(c))
						{
							_state = State.PiValue;
						}
						else
						{
							_name.Append(c);
						}
						break;

					case State.PiValue:
						if (c == '?' && _peeker.Match(">"))
						{
							_peeker.Read(1);
							_state = State.Text;
							_nodeType = HtmlNodeType.ProcessingInstruction;
							return true;
						}
						_value.Append(c);
						break;

					case State.AttrKey:
						if (Char.IsWhiteSpace(c))
						{
							_state = State.AttrEq;
						}
						else if (c == '=')
						{
							_state = State.AttrValue;
						}
						else if (c == '>')
						{
							_attributes[attrName.ToString()] = null;
							_state = State.ElemAttributes;
							_peeker.Push(c);
							attrName.Length = 0;
							attrValue.Length = 0;
						}
						else
						{
							attrName.Append(c);
						}
						break;

					case State.AttrEq:
						if (Char.IsWhiteSpace(c)) {}
						else if (c == '=')
						{
							_state = State.AttrValue;
						}
						else
						{
							_attributes[attrName.ToString()] = null;
							_state = State.ElemAttributes;
							_peeker.Push(c);
							attrName.Length = 0;
							attrValue.Length = 0;
						}
						break;

					case State.AttrValue:
						if (Char.IsWhiteSpace(c)) {}
						else if (c == '"' || c == '\'')
						{
							quoteStyle = c;
							_state = State.AttrQuote;
						}
						else
						{
							quoteStyle = ' ';
							_state = State.AttrQuote;
							attrValue.Append(c);
						}
						break;

					case State.AttrQuote:
						if (c == quoteStyle || (' ' == quoteStyle && c == '>'))
						{
							_attributes[attrName.ToString()] = HttpUtility.HtmlDecode(attrValue.ToString());
							_state = State.ElemAttributes;
							if (' ' == quoteStyle && c == '>')
							{
								_peeker.Push(c);
							}
							attrName.Length = 0;
							attrValue.Length = 0;
						}
						else
						{
							attrValue.Append(c);
						}
						break;
				}
			}

			switch (_state)
			{
				case State.Text:
					_state = 0;
					if (_value.Length > 0)
					{
						_nodeType = HtmlNodeType.Text;
						return true;
					}
					return false;

				case State.Amp:
					_state = 0;
					_value.Append('&');
					_nodeType = HtmlNodeType.Text;
					return true;

				case State.Lt:
					_state = 0;
					_value.Append('<');
					_nodeType = HtmlNodeType.Text;
					return true;
			}

			return false;
		}
		protected override void WriteEndElement(TextWriter result, string name)
		{
			if (!KeepElements.Contains(name))
				return;

			_prevNodeType = HtmlNodeType.None;
			base.WriteEndElement(result, name);
		}
        /// <summary>
        /// Comments handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="commentText">Comment text</param>
        private void CommentHandler(MarkupParsingContext context, string commentText)
        {
            _currentNodeType = HtmlNodeType.Comment;

            const int beginCommentLength = 4;
            string processedCommentText = string.Empty;

            if (_noindexCommentRegex.IsMatch(commentText))
            {
                // Processing of noindex comment
                Match noindexCommentMatch = _noindexCommentRegex.Match(commentText);
                processedCommentText = noindexCommentMatch.Groups["closingSlash"].Length > 0 ? "/noindex" : "noindex";
            }
            else if (KnockoutHelpers.IsEndContainerlessComment(commentText))
            {
                // Processing of end Knockout containerless comment
                processedCommentText = "/ko";
            }
            else if (KnockoutHelpers.IsBeginContainerlessComment(commentText))
            {
                // Processing of start Knockout containerless comment
                string koExpression = string.Empty;

                KnockoutHelpers.ParseBeginContainerlessComment(commentText,
                    (localContext, expression) =>
                    {
                        SourceCodeNodeCoordinates expressionCoordinates = localContext.NodeCoordinates;
                        expressionCoordinates.ColumnNumber += beginCommentLength;

                        koExpression = _settings.MinifyKnockoutBindingExpressions ?
                            MinifyKnockoutBindingExpression(context, expressionCoordinates, expression) : expression;
                    }
                );

                processedCommentText = "ko " + koExpression;
            }
            else if (AngularHelpers.IsCommentDirective(commentText))
            {
                // Processing of Angular comment directive
                string ngOriginalDirectiveName = string.Empty;
                string ngNormalizedDirectiveName = string.Empty;
                string ngExpression = string.Empty;

                AngularHelpers.ParseCommentDirective(commentText,
                    (localContext, originalDirectiveName, normalizedDirectiveName) =>
                    {
                        ngOriginalDirectiveName = originalDirectiveName;
                        ngNormalizedDirectiveName = normalizedDirectiveName;
                    },
                    (localContext, expression) =>
                    {
                        SourceCodeNodeCoordinates expressionCoordinates = localContext.NodeCoordinates;
                        expressionCoordinates.ColumnNumber += beginCommentLength;

                        ngExpression = expression;
                        if (_settings.MinifyAngularBindingExpressions
                            && ContainsAngularBindingExpression(ngNormalizedDirectiveName))
                        {
                            ngExpression = MinifyAngularBindingExpression(context, SourceCodeNodeCoordinates.Empty,
                                expressionCoordinates, expression);
                        }
                    }
                );

                processedCommentText = "directive:" + ngOriginalDirectiveName + " " + ngExpression;
            }
            else
            {
                if (!_settings.RemoveHtmlComments)
                {
                    processedCommentText = commentText;
                }
            }

            if (processedCommentText.Length > 0)
            {
                _buffer.Add("<!--");
                _buffer.Add(processedCommentText);
                _buffer.Add("-->");
            }
        }
Esempio n. 32
0
 internal HtmlNode CreateNode(HtmlNodeType type)
 {
     return(this.CreateNode(type, -1));
 }
        /// <summary>
        /// Document type declaration handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="doctype">Document type declaration</param>
        private void DoctypeHandler(MarkupParsingContext context, string doctype)
        {
            _currentNodeType = HtmlNodeType.Doctype;

            WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode;
            if (whitespaceMinificationMode != WhitespaceMinificationMode.None)
            {
                // Processing of whitespace, that followed before the document type declaration
                TrimEndLastBufferItem();
            }

            _buffer.Add(_settings.UseShortDoctype ? "<!DOCTYPE html>" : Utils.CollapseWhitespace(doctype));
        }
Esempio n. 34
0
 internal HtmlNode CreateNode(HtmlNodeType type)
 {
     return CreateNode(type, -1);
 }
Esempio n. 35
0
 /// <summary>
 /// Begins parsing a new node.
 /// </summary>
 /// <param name="nodeType">The type of node being parsed.</param>
 private void StartNode(HtmlNodeType nodeType)
 {
     m_NodeType = nodeType;
     m_Attributes.Clear();
     m_Name = new StringBuilder();
     m_Value = new StringBuilder();
 }
		protected override void WriteElement(TextWriter result, string name, Dictionary<string, string> attributes, bool empty)
		{
			if (!KeepElements.Contains(name))
				return;

			if (KeepAttributeElements.Contains(name))
			{
				_prevNodeType = HtmlNodeType.Element;
				base.WriteElement(result, name, attributes, empty);
				return;
			}

			_prevNodeType = HtmlNodeType.Element;
			base.WriteElement(result, name, new Dictionary<string, string>(), empty);
		}
		protected override void WriteCData(TextWriter result, string value)
		{
			_prevNodeType = HtmlNodeType.CDATA;
			base.WriteCData(result, value);
		}
		protected override void WriteText(TextWriter result, string value)
		{
			if (_prevNodeType == HtmlNodeType.Element && !string.IsNullOrEmpty(value))
			{
				value = value.TrimStart(new[] { '\r', '\n' });
			}

			if (TopTag != null)
			{
				switch (TopTag.ToLowerInvariant())
				{
					case "pre":
						string text = RewritePreText(RequiredHtmlEncode ? value.HtmlEncode() : value);
						_prevNodeType = HtmlNodeType.Text;
						result.Write(text);
						return;
				}
			}

			_prevNodeType = HtmlNodeType.Text;
			base.WriteText(result, value);
		}
        /// <summary>
        /// If conditional comments handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="htmlConditionalComment">Conditional comment</param>
        private void IfConditionalCommentHandler(MarkupParsingContext context,
			HtmlConditionalComment htmlConditionalComment)
        {
            _currentNodeType = HtmlNodeType.IfConditionalComment;
            HtmlConditionalCommentType htmlConditionalCommentType = htmlConditionalComment.Type;

            string startPart;
            string endPart;

            switch (htmlConditionalCommentType)
            {
                case HtmlConditionalCommentType.Hidden:
                    startPart = "<!--[if ";
                    endPart = "]>";

                    break;
                case HtmlConditionalCommentType.RevealedValidating:
                    startPart = "<!--[if ";
                    endPart = "]><!-->";

                    break;
                case HtmlConditionalCommentType.RevealedValidatingSimplified:
                    startPart = "<!--[if ";
                    endPart = "]>-->";

                    break;
                case HtmlConditionalCommentType.Revealed:
                    startPart = "<![if ";
                    endPart = "]>";

                    break;
                default:
                    throw new NotSupportedException();
            }

            _buffer.Add(startPart);
            _buffer.Add(htmlConditionalComment.Expression);
            _buffer.Add(endPart);
        }
Esempio n. 40
0
 /// <summary>
 /// Инициализирует объект типа HtmlNode
 /// </summary>
 /// <param name="nodeType">Тип элемента</param>
 /// <param name="beginPosition">Позиция в файле начала данного элемента </param>
 /// <param name="endPosition">Позиция в файле конца данного элемента </param>
 public HtmlNode(HtmlNodeType nodeType, long beginPosition, long endPosition) :
     this(null, nodeType, string.Empty, beginPosition, endPosition)
 {
 }
        /// <summary>
        /// Start tags handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="tag">HTML tag</param>
        private void EndTagHandler(MarkupParsingContext context, HtmlTag tag)
        {
            HtmlNodeType previousNodeType = _currentNodeType;
            string previousTagName;
            IList<HtmlAttribute> previousTagAttributes;
            if (_currentTag != null)
            {
                previousTagName = _currentTag.Name;
                previousTagAttributes = _currentTag.Attributes;
            }
            else
            {
                previousTagName = string.Empty;
                previousTagAttributes = new List<HtmlAttribute>();
            }
            string previousText = _currentText;

            _currentNodeType = HtmlNodeType.EndTag;
            _currentTag = tag;
            _currentText = string.Empty;

            string tagName = tag.Name;
            HtmlTagFlags tagFlags = tag.Flags;

            WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode;
            if (whitespaceMinificationMode != WhitespaceMinificationMode.None)
            {
                if (_tagsWithNotRemovableWhitespaceQueue.Count == 0 && !tagFlags.EmbeddedCode)
                {
                    // Processing of whitespace, that followed before the end tag
                    bool allowTrimEnd = false;
                    if (tagFlags.Invisible)
                    {
                        allowTrimEnd = true;
                    }
                    else
                    {
                        if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium)
                        {
                            allowTrimEnd = tagFlags.Block;
                        }
                        else if (whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive)
                        {
                            allowTrimEnd = (tagFlags.Block || tagFlags.Inline || tagFlags.InlineBlock);
                        }
                    }

                    if (allowTrimEnd)
                    {
                        TrimEndLastBufferItem();
                    }
                }

                // Check if current tag is in a whitespace queue
                if (_tagsWithNotRemovableWhitespaceQueue.Count > 0 && tagName == _tagsWithNotRemovableWhitespaceQueue.Last())
                {
                    _tagsWithNotRemovableWhitespaceQueue.Dequeue();
                }
            }

            if (_settings.RemoveOptionalEndTags
                && (previousNodeType == HtmlNodeType.EndTag
                    || (previousTagName != tagName && string.IsNullOrWhiteSpace(previousText)))
                && !IsSafeOptionalEndTag(previousTagName))
            {

                if (CanRemoveOptionalTagByParentTagName(previousTagName, tagName))
                {
                    RemoveLastEndTagFromBuffer(previousTagName);
                }
            }

            bool isElementEmpty = (string.IsNullOrWhiteSpace(previousText) && previousTagName == tagName
                && previousNodeType != HtmlNodeType.EndTag);
            if (_settings.RemoveTagsWithoutContent && isElementEmpty
                && CanRemoveTagWithoutContent(previousTagName, previousTagAttributes))
            {
                // Remove last "element" from buffer, return
                if (RemoveLastStartTagFromBuffer(tagName))
                {
                    FlushBuffer();
                    return;
                }
            }

            if (_settings.RemoveOptionalEndTags && tagFlags.OptionalEndTag && IsSafeOptionalEndTag(tagName))
            {
                // Leave only start tag in buffer
                FlushBuffer();
                return;
            }

            // Add end tag to buffer
            _buffer.Add("</");
            _buffer.Add(tagName);
            _buffer.Add(">");
        }
Esempio n. 42
0
        /// <summary>
        /// Initializes a new instance of the <see cref="HtmlNode"/> class.
        /// Initializes HtmlNode, providing type, owner and where it exists in a collection
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="ownerdocument">The owner document.</param>
        /// <param name="index">The index.</param>
        public HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
        {
            this.NodeType = type;
            this.OwnerDocument = ownerdocument;
            this.OuterStartIndex = index;

            switch (type)
            {
                case HtmlNodeType.Comment:
                    this.NodeName = HtmlNodeTypeNameComment;
                    this.EndNode = this;
                    break;

                case HtmlNodeType.Document:
                    this.NodeName = HtmlNodeTypeNameDocument;
                    this.EndNode = this;
                    break;

                case HtmlNodeType.Text:
                    this.NodeName = HtmlNodeTypeNameText;
                    this.EndNode = this;
                    break;
            }

            if (this.OwnerDocument.Openednodes != null)
            {
                if (!this.Closed)
                {
                    // we use the index as the key

                    // -1 means the node comes from public
                    if (-1 != index)
                    {
                        this.OwnerDocument.Openednodes.Add(index, this);
                    }
                }
            }

            if ((-1 != index) || (type == HtmlNodeType.Comment) || (type == HtmlNodeType.Text))
            {
                return;
            }

            // innerhtml and outerhtml must be calculated
            this.OuterChanged = true;
            this.InnerChanged = true;
        }
        /// <summary>
        /// End If conditional comments handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="type">End If conditional comment type</param>
        private void EndIfConditionalCommentHandler(MarkupParsingContext context, HtmlConditionalCommentType type)
        {
            _currentNodeType = HtmlNodeType.EndIfConditionalComment;

            string endIfComment;

            switch (type)
            {
                case HtmlConditionalCommentType.Hidden:
                    endIfComment = "<![endif]-->";
                    break;
                case HtmlConditionalCommentType.RevealedValidating:
                case HtmlConditionalCommentType.RevealedValidatingSimplified:
                    endIfComment = "<!--<![endif]-->";
                    break;
                case HtmlConditionalCommentType.Revealed:
                    endIfComment = "<![endif]>";
                    break;
                default:
                    throw new NotSupportedException();
            }

            _buffer.Add(endIfComment);
        }
        /// <summary>
        /// Minify HTML content
        /// </summary>
        /// <param name="content">HTML content</param>
        /// <param name="fileContext">File context</param>
        /// <param name="encoding">Text encoding</param>
        /// <param name="generateStatistics">Flag for whether to allow generate minification statistics</param>
        /// <returns>Minification result</returns>
        public MarkupMinificationResult Minify(string content, string fileContext, Encoding encoding,
			bool generateStatistics)
        {
            MinificationStatistics statistics = null;
            string minifiedContent = string.Empty;
            var errors = new List<MinificationErrorInfo>();
            var warnings = new List<MinificationErrorInfo>();

            lock (_minificationSynchronizer)
            {
                _fileContext = fileContext;
                _encoding = encoding;

                try
                {
                    if (generateStatistics)
                    {
                        statistics = new MinificationStatistics(_encoding);
                        statistics.Init(content);
                    }

                    _result = new StringBuilder(content.Length);
                    _buffer = new List<string>();
                    _errors = new List<MinificationErrorInfo>();
                    _warnings = new List<MinificationErrorInfo>();
                    _tagsWithNotRemovableWhitespaceQueue = new Queue<string>();
                    _currentNodeType = HtmlNodeType.Unknown;
                    _currentTag = null;
                    _currentText = string.Empty;

                    _htmlParser.Parse(content);

                    FlushBuffer();

                    if (_errors.Count == 0)
                    {
                        minifiedContent = _result.ToString();

                        if (generateStatistics)
                        {
                            statistics.End(minifiedContent);
                        }
                    }
                }
                catch (HtmlParsingException e)
                {
                    WriteError(LogCategoryConstants.HtmlParsingError, e.Message, _fileContext,
                        e.LineNumber, e.ColumnNumber, e.SourceFragment);
                }
                finally
                {
                    _result.Clear();
                    _buffer.Clear();
                    _tagsWithNotRemovableWhitespaceQueue.Clear();
                    _currentTag = null;

                    if (_errors.Count == 0)
                    {
                        _logger.Info(LogCategoryConstants.HtmlMinificationSuccess,
                            string.Format(Strings.SuccesMessage_MarkupMinificationComplete, "HTML"),
                            _fileContext, statistics);
                    }

                    errors.AddRange(_errors);
                    warnings.AddRange(_warnings);

                    _errors.Clear();
                    _warnings.Clear();
                    _fileContext = null;
                    _encoding = null;
                }
            }

            return new MarkupMinificationResult(minifiedContent, errors, warnings, statistics);
        }
Esempio n. 45
0
 protected void SetNodeType(HtmlNodeType nodeType)
 {
     this.nodeType = nodeType;
 }
        /// <summary>
        /// XML declaration handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="xmlDeclaration">XML declaration</param>
        private void XmlDeclarationHandler(MarkupParsingContext context, string xmlDeclaration)
        {
            _currentNodeType = HtmlNodeType.XmlDeclaration;

            WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode;
            if (whitespaceMinificationMode != WhitespaceMinificationMode.None)
            {
                // Processing of whitespace, that followed before the document type declaration
                TrimEndLastBufferItem();
            }

            if (_settings.UseXhtmlSyntax)
            {
                XmlMinifier innerXmlMinifier = GetInnerXmlMinifierInstance();
                MarkupMinificationResult minificationResult = innerXmlMinifier.Minify(xmlDeclaration);

                if (minificationResult.Errors.Count == 0)
                {
                    _buffer.Add(minificationResult.MinifiedContent);
                }
                else
                {
                    string sourceCode = context.SourceCode;
                    var documentCoordinates = context.NodeCoordinates;

                    foreach (MinificationErrorInfo error in minificationResult.Errors)
                    {
                        var xmlNodeCoordinates = new SourceCodeNodeCoordinates(error.LineNumber, error.ColumnNumber);
                        var absoluteNodeCoordinates = SourceCodeNavigator.CalculateAbsoluteNodeCoordinates(
                            documentCoordinates, xmlNodeCoordinates);

                        string sourceFragment = SourceCodeNavigator.GetSourceFragment(
                            sourceCode, absoluteNodeCoordinates);
                        string message = Strings.ErrorMessage_XmlDeclarationMinificationFailed;

                        WriteError(LogCategoryConstants.HtmlMinificationError, message, _fileContext,
                            absoluteNodeCoordinates.LineNumber, absoluteNodeCoordinates.ColumnNumber, sourceFragment);
                    }
                }
            }
            else
            {
                string sourceCode = context.SourceCode;
                SourceCodeNodeCoordinates xmlDeclarationCoordinates = context.NodeCoordinates;

                WriteWarning(LogCategoryConstants.HtmlMinificationWarning,
                    Strings.WarningMessage_XmlDeclarationNotAllowed, _fileContext,
                    xmlDeclarationCoordinates.LineNumber, xmlDeclarationCoordinates.ColumnNumber,
                    SourceCodeNavigator.GetSourceFragment(sourceCode, xmlDeclarationCoordinates));
            }
        }
Esempio n. 47
0
 /// <summary>
 /// Инициализирует объект типа HtmlNode
 /// </summary>
 /// <param name="nodeType">Тип элемента</param>
 /// <param name="tagName">Имя тега</param>
 /// <param name="beginPosition">Позиция в файле начала данного элемента </param>
 /// <param name="endPosition">Позиция в файле конца данного элемента </param>
 public HtmlNode(HtmlNodeType nodeType, string tagName, long beginPosition, long endPosition) :
     this(null, nodeType, tagName, beginPosition, endPosition)
 {
 }
        /// <summary>
        /// Text handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="text">Text</param>
        private void TextHandler(MarkupParsingContext context, string text)
        {
            HtmlNodeType nodeType = _currentNodeType;
            string tagName;
            HtmlTagFlags tagFlags;
            IList<HtmlAttribute> attributes;
            if (_currentTag != null)
            {
                tagName = _currentTag.Name;
                tagFlags = _currentTag.Flags;
                attributes = _currentTag.Attributes;
            }
            else
            {
                tagName = string.Empty;
                tagFlags = new HtmlTagFlags();
                attributes = new List<HtmlAttribute>();
            }

            WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode;

            if (nodeType == HtmlNodeType.StartTag && tagFlags.EmbeddedCode)
            {
                switch (tagName)
                {
                    case "script":
                    case "style":
                        string contentType = attributes
                            .Where(a => a.Name == "type")
                            .Select(a => a.Value)
                            .FirstOrDefault()
                            ;

                        if (tagName == "script")
                        {
                            if (string.IsNullOrWhiteSpace(contentType))
                            {
                                string language = attributes
                                    .Where(a => a.Name == "language")
                                    .Select(a => a.Value)
                                    .FirstOrDefault()
                                    ;

                                if (!string.IsNullOrWhiteSpace(language)
                                    && language.Trim().ToLowerInvariant() == "vbscript")
                                {
                                    contentType = VBS_CONTENT_TYPE;
                                }
                            }

                            text = ProcessEmbeddedScriptContent(context, text, contentType);
                        }
                        else if (tagName == "style")
                        {
                            text = ProcessEmbeddedStyleContent(context, text, contentType);
                        }

                        break;
                    case "svg":
                        text = ProcessEmbeddedSvgContent(context, text);
                        break;
                    case "math":
                        text = ProcessEmbeddedMathMlContent(context, text);
                        break;
                }
            }
            else
            {
                if (whitespaceMinificationMode != WhitespaceMinificationMode.None)
                {
                    if (_tagsWithNotRemovableWhitespaceQueue.Count == 0)
                    {
                        if (context.Position == 0)
                        {
                            // Processing of starting whitespace
                            text = text.TrimStart();
                        }
                        else if ((context.Position + text.Length) == context.Length)
                        {
                            // Processing of ending whitespace
                            text = text.TrimEnd();
                        }
                        else if (nodeType == HtmlNodeType.StartTag)
                        {
                            // Processing of whitespace, that followed after the start tag
                            bool allowTrimStart = false;
                            if (tagFlags.Invisible || (tagFlags.NonIndependent && tagFlags.Empty))
                            {
                                allowTrimStart = true;
                            }
                            else
                            {
                                if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium)
                                {
                                    allowTrimStart = tagFlags.Block;
                                }
                                else if (whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive)
                                {
                                    allowTrimStart = (tagFlags.Block
                                        || ((tagFlags.Inline || tagFlags.InlineBlock) && !tagFlags.Empty));
                                }
                            }

                            if (allowTrimStart)
                            {
                                text = text.TrimStart();
                            }
                        }
                        else if (nodeType == HtmlNodeType.EndTag)
                        {
                            // Processing of whitespace, that followed after the end tag
                            bool allowTrimStart = false;
                            if (tagFlags.Invisible || tagFlags.NonIndependent)
                            {
                                allowTrimStart = true;
                            }
                            else
                            {
                                if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium
                                    || whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive)
                                {
                                    allowTrimStart = tagFlags.Block;
                                }
                            }

                            if (allowTrimStart)
                            {
                                text = text.TrimStart();
                            }
                        }
                        else if (nodeType == HtmlNodeType.Doctype || nodeType == HtmlNodeType.XmlDeclaration)
                        {
                            // Processing of whitespace, that followed after the document type declaration
                            // or XML declaration
                            text = text.TrimStart();
                        }

                        if (text.Length > 0)
                        {
                            text = Utils.CollapseWhitespace(text);
                        }
                    }
                    else if (nodeType == HtmlNodeType.StartTag && tagName == "textarea"
                        && string.IsNullOrWhiteSpace(text))
                    {
                        text = string.Empty;
                    }
                }
            }

            _currentNodeType = HtmlNodeType.Text;
            _currentText = text;

            if (text.Length > 0)
            {
                _buffer.Add(text);
            }
        }
        /// <summary>
        /// Template tags handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="expression">Expression</param>
        /// <param name="startDelimiter">Start delimiter</param>
        /// <param name="endDelimiter">End delimiter</param>
        private void TemplateTagHandler(MarkupParsingContext context, string expression, string startDelimiter,
			string endDelimiter)
        {
            _currentNodeType = HtmlNodeType.TemplateTag;

            string processedExpression = expression;
            if (_settings.MinifyAngularBindingExpressions && startDelimiter == "{{" && endDelimiter == "}}")
            {
                processedExpression = MinifyAngularBindingExpression(context, expression);
            }

            _buffer.Add(startDelimiter);
            _buffer.Add(processedExpression);
            _buffer.Add(endDelimiter);
        }
        /// <summary>
        /// Start tags handler
        /// </summary>
        /// <param name="context">Markup parsing context</param>
        /// <param name="tag">HTML tag</param>
        private void StartTagHandler(MarkupParsingContext context, HtmlTag tag)
        {
            HtmlNodeType previousNodeType = _currentNodeType;
            string previousTagName = string.Empty;
            if (_currentTag != null)
            {
                previousTagName = _currentTag.Name;
            }

            if (_settings.UseMetaCharsetTag && IsMetaContentTypeTag(tag.Name, tag.Attributes))
            {
                tag = UpgradeToMetaCharsetTag(tag);
            }

            _currentNodeType = HtmlNodeType.StartTag;
            _currentTag = tag;
            _currentText = string.Empty;

            string tagName = tag.Name;
            IList<HtmlAttribute> attributes = tag.Attributes;
            HtmlTagFlags tagFlags = tag.Flags;

            // Set whitespace flags for nested tags (for example <span> within a <pre>)
            WhitespaceMinificationMode whitespaceMinificationMode = _settings.WhitespaceMinificationMode;
            if (whitespaceMinificationMode != WhitespaceMinificationMode.None)
            {
                if (_tagsWithNotRemovableWhitespaceQueue.Count == 0)
                {
                    // Processing of whitespace, that followed before the start tag
                    bool allowTrimEnd = false;
                    if (tagFlags.Invisible || tagFlags.NonIndependent)
                    {
                        allowTrimEnd = true;
                    }
                    else
                    {
                        if (whitespaceMinificationMode == WhitespaceMinificationMode.Medium
                            || whitespaceMinificationMode == WhitespaceMinificationMode.Aggressive)
                        {
                            allowTrimEnd = tagFlags.Block;
                        }
                    }

                    if (allowTrimEnd)
                    {
                        TrimEndLastBufferItem();
                    }
                }

                if (!CanMinifyWhitespace(tagName))
                {
                    _tagsWithNotRemovableWhitespaceQueue.Enqueue(tagName);
                }
            }

            if (_settings.RemoveOptionalEndTags
                && previousNodeType != HtmlNodeType.StartTag
                && !IsSafeOptionalEndTag(previousTagName))
            {
                if (CanRemoveOptionalEndTagByNextTagName(previousTagName, tagName))
                {
                    RemoveLastEndTagFromBuffer(previousTagName);
                }

                FlushBuffer();
            }

            _buffer.Add("<");
            _buffer.Add(tagName);

            int attributeCount = attributes.Count;

            for (int attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++)
            {
                _buffer.Add(BuildAttributeString(context, tag, attributes[attributeIndex]));
            }

            if (tagFlags.Empty)
            {
                if (_settings.EmptyTagRenderMode == HtmlEmptyTagRenderMode.Slash)
                {
                    _buffer.Add("/");
                }
                else if (_settings.EmptyTagRenderMode == HtmlEmptyTagRenderMode.SpaceAndSlash)
                {
                    _buffer.Add(" /");
                }
            }
            _buffer.Add(">");
        }
Esempio n. 51
0
 protected void SetNodeType(HtmlNodeType nodeType)
 {
     this.nodeType = nodeType;
 }
Esempio n. 52
0
 private void PushNodeStart(HtmlNodeType type, int index)
 {
   this._currentnode = this.CreateNode(type, index);
   this._currentnode._line = this._line;
   this._currentnode._lineposition = this._lineposition;
   if (type == HtmlNodeType.Element)
     --this._currentnode._lineposition;
   this._currentnode._streamposition = index;
 }
		public string GetInnerTextUpToElement(string elementName, HtmlNodeType nodeType)
		{
			var s = new StringBuilder();
			while (Read())
			{
				if (NodeType == nodeType && Name.ToUpper() == elementName.ToUpper())
				{
					break;
				}
				if (NodeType == HtmlNodeType.Text || NodeType == HtmlNodeType.CDATA)
				{
					var part = Value.Replace("\r\n", "");
					s.Append(Value);
				}
			}
			return s.ToString().Trim();
		}
 public HtmlElement(HtmlNodeType nodeType, HtmlElementType elemntType, int characterPosition)
     : this(nodeType, elemntType, null, true, characterPosition)
 {
 }
Esempio n. 55
0
 public HtmlNode(HtmlNodeType type, HtmlAgilityPack.HtmlDocument ownerdocument, int index)
     : base(type, ownerdocument, index)
 {
 }
 public HtmlElement(HtmlNodeType nodeType, HtmlElementType elemntType, string value, int characterPosition)
     : this(nodeType, elemntType, null, false, characterPosition)
 {
     this.m_value = value;
 }
Esempio n. 57
0
 private void PushNodeStart(HtmlNodeType type, int index)
 {
     _currentnode = CreateNode(type, index);
     _currentnode._line = _line;
     _currentnode._lineposition = _lineposition;
     if (type == HtmlNodeType.Element)
     {
         _currentnode._lineposition--;
     }
     _currentnode._streamposition = index;
 }
Esempio n. 58
0
            /// <summary>
            /// Returns the Html/Xml of the current Element up to its corresponding EndElement, unless the
            /// current Element is an empty element (e.g. ends with a /> and not just a >).  If it is an
            /// empty element, returns only the current Element.
            /// </summary>
            /// <returns>
            /// Returns a <Typ>TextReader</Typ> that gives access to the HTML (or XML as the case may be) from
            /// the current node (which must be an Element node) to the corresponding EndElement
            /// node (or the end of the file if the EndElement doesn't exist.)
            /// </returns>
            /// <remarks>
            /// After calling this method, the state of the parser will be that the current note type is "none."
            /// </remarks>
            /// <exception cref="InvalidOperationException">If the node type isn't an Element node,
            /// or the node's name is blank.</exception>
            internal TextReader GetOuterHtml()
            {
                PlainTextString name = Name;
                // the current node type must be an Element node and have a name.
                if (m_NodeType != HtmlNodeType.Element || String.IsNullOrEmpty(name)) throw new InvalidOperationException();

                // Move m_ParseWriter over to m_GetOuterHtmlWriter so everything gets copied into it, and it never
                // gets replaced (see ReadNextChar() - if m_GetOuterHtmlWriter is non-null, characters get
                // copied into it instead of m_ParseWriter.)
                m_GetOuterHtmlWriter = m_ParseWriter;
                m_ParseWriter = null;

                // Capture the rest of the current Element.  Set m_ParseMode to Skip to avoid saving
                // attribute values.
                m_ParseMode = ParseMode.Skip;
                try
                {
                    while (m_ParseState != HtmlParseState.None) Parse();
                }
                catch (EndOfStreamException)
                {
                    // do nothing
                }

                // If this isn't an empty element, find the corresponding EndElement
                if (!IsEmptyElement)
                {
                    // keep a count of Element node names equivalent to the current node name, to
                    // account for Elements of the same name that are inside the current Element,
                    // in order to stop parsing at the corrent EndElement.
                    int count = 1; // count the current Element
                    while (count > 0 && GetNextNode())
                    {
                        if (m_NodeType == HtmlNodeType.Element && 0 == String.Compare(Name, name, StringComparison.OrdinalIgnoreCase)) count++;
                        else if (m_NodeType == HtmlNodeType.EndElement && 0 == String.Compare(Name, name, StringComparison.OrdinalIgnoreCase)) count--;
                    }
                    // If there is still a count, it means GetNextNode returned false, meaning end of stream.
                    if (count == 0)
                    {
                        // make sure to finish parsing the current node
                        try
                        {
                            while (m_ParseState != HtmlParseState.None)
                            {
                                Parse();
                            }
                        }
                        catch (EndOfStreamException)
                        {
                            // do nothing
                        }
                    }
                }
                // transfer the stream writer's stream into a text reader and return it.
                m_GetOuterHtmlWriter.Flush();
                // the stream is a DetachableStream from the ReadNextChar() method
                DetachableStream detachableStream = (DetachableStream)m_GetOuterHtmlWriter.BaseStream;
                Stream stream = detachableStream.Stream; // the underlying stream
                // detach the stream from the m_GetOuterHtmlWriter
                detachableStream.Detach();
                // position the underlying stream at position 0 and hand it off to the StreamReader
                stream.Position = 0;
                StreamReader reader = new StreamReader(stream, m_GetOuterHtmlWriter.Encoding);
                m_GetOuterHtmlWriter.Dispose();
                m_GetOuterHtmlWriter = null;

                // set the current node type to "none"
                m_NodeType = HtmlNodeType.None;

                // return the reader
                return reader;
            }
Esempio n. 59
0
        internal HtmlNode CreateNode(HtmlNodeType type, int index)
        {
            switch (type)
            {
                case HtmlNodeType.Comment:
                    return new HtmlCommentNode(this, index);

                case HtmlNodeType.Text:
                    return new HtmlTextNode(this, index);

                default:
                    return new HtmlNode(type, this, index);
            }
        }
Esempio n. 60
0
 protected internal HtmlNode(HtmlNodeType nodeType, string name, string value = null)
 {
     NodeType = nodeType;
     Name = name;
     Value = value;
 }