Beispiel #1
0
        public HtmlNodeCollection FindByAttributeNameValue(string attributeName, string attributeValue, bool searchChildren)
        {
            HtmlNodeCollection results = new HtmlNodeCollection(null);

            foreach (HtmlNode node in base.List)
            {
                if (node is HtmlElement)
                {
                    foreach (HtmlAttribute attribute in ((HtmlElement)node).Attributes)
                    {
                        if (attribute.Name.ToLower().Equals(attributeName.ToLower()))
                        {
                            if (attribute.Value.ToLower().Equals(attributeValue.ToLower()))
                            {
                                results.Add(node);
                            }
                            break;
                        }
                    }
                    if (searchChildren)
                    {
                        foreach (HtmlNode matchedChild in ((HtmlElement)node).Nodes.FindByAttributeNameValue(attributeName, attributeValue, searchChildren))
                        {
                            results.Add(matchedChild);
                        }
                    }
                }
            }
            return(results);
        }
Beispiel #2
0
        /// <summary>
        /// This will search though this collection of nodes for all elements with the
        /// specified name. If you want to search the subnodes recursively, you should
        /// pass True as the parameter in searchChildren. This search is guaranteed to
        /// return nodes in the order in which they are found in the document.
        /// </summary>
        /// <param name="name">The name of the element to find</param>
        /// <param name="searchChildren">True if you want to search sub-nodes, False to
        /// only search this collection.</param>
        /// <returns>A collection of all the nodes that macth.</returns>
        public HtmlNodeCollection FindByName(string name, bool searchChildren)
        {
            HtmlNodeCollection results = new HtmlNodeCollection(null);

            foreach (HtmlNode node in base.List)
            {
                if (node is HtmlElement)
                {
                    if (((HtmlElement)node).Name.ToLower().Equals(name.ToLower()))
                    {
                        results.Add(node);
                    }
                    if (searchChildren)
                    {
                        foreach (HtmlNode matchedChild in ((HtmlElement)node).Nodes.FindByName(name, searchChildren))
                        {
                            results.Add(matchedChild);
                        }
                    }
                }
            }
            return(results);
        }
Beispiel #3
0
        /// <summary>
        /// This will parse a string containing HTML and will produce a domain tree.
        /// </summary>
        /// <param name="html">The HTML to be parsed</param>
        /// <returns>A tree representing the elements</returns>
        public HtmlNodeCollection Parse(string html)
        {
            HtmlNodeCollection nodes = new HtmlNodeCollection(null);

            html = PreprocessScript(html, "script");
            html = PreprocessScript(html, "style");

            html = RemoveComments(html);
            html = RemoveSGMLComments(html);
            StringCollection tokens = GetTokens(html);

            int         index   = 0;
            HtmlElement element = null;

            while (index < tokens.Count)
            {
                if ("<".Equals(tokens[index]))
                {
                    // Read open tag

                    index++;
                    if (index >= tokens.Count)
                    {
                        break;
                    }
                    string tag_name = tokens[index];
                    index++;
                    element = new HtmlElement(tag_name);
                    // read the attributes and values

                    while (index < tokens.Count && !">".Equals(tokens[index]) && !"/>".Equals(tokens[index]))
                    {
                        string attribute_name = tokens[index];
                        index++;
                        if (index < tokens.Count && "=".Equals(tokens[index]))
                        {
                            index++;
                            string attribute_value;
                            if (index < tokens.Count)
                            {
                                attribute_value = tokens[index];
                            }
                            else
                            {
                                attribute_value = null;
                            }
                            index++;
                            HtmlAttribute attribute = new HtmlAttribute(attribute_name, HtmlEncoder.DecodeValue(attribute_value));
                            element.Attributes.Add(attribute);
                        }
                        else if (index < tokens.Count)
                        {
                            // Null-value attribute
                            HtmlAttribute attribute = new HtmlAttribute(attribute_name, null);
                            element.Attributes.Add(attribute);
                        }
                    }
                    nodes.Add(element);
                    if (index < tokens.Count && "/>".Equals(tokens[index]))
                    {
                        element.IsTerminated = true;
                        index++;
                        element = null;
                    }
                    else if (index < tokens.Count && ">".Equals(tokens[index]))
                    {
                        index++;
                    }
                }
                else if (">".Equals(tokens[index]))
                {
                    index++;
                }
                else if ("</".Equals(tokens[index]))
                {
                    // Read close tag
                    index++;
                    if (index >= tokens.Count)
                    {
                        break;
                    }
                    string tag_name = tokens[index];
                    index++;

                    int open_index = FindTagOpenNodeIndex(nodes, tag_name);
                    if (open_index != -1)
                    {
                        MoveNodesDown(ref nodes, open_index + 1, (HtmlElement)nodes[open_index]);
                    }
                    else
                    {
                        // Er, there is a close tag without an opening tag!!
                    }

                    // Skip to the end of this tag
                    while (index < tokens.Count && !">".Equals(tokens[index]))
                    {
                        index++;
                    }
                    if (index < tokens.Count && ">".Equals(tokens[index]))
                    {
                        index++;
                    }

                    element = null;
                }
                else
                {
                    // Read text
                    string value = tokens[index];
                    if (mRemoveEmptyElementText)
                    {
                        value = RemoveWhitespace(value);
                    }
                    value = DecodeScript(value);

                    if (mRemoveEmptyElementText && value.Length == 0)
                    {
                        // We do nothing
                    }
                    else
                    {
                        if (!(element != null && element.NoEscaping))
                        {
                            value = HtmlEncoder.DecodeValue(value);
                        }
                        HtmlText node = new HtmlText(value);
                        nodes.Add(node);
                    }
                    index++;
                }
            }
            return(nodes);
        }
Beispiel #4
0
		/// <summary>
		/// This will parse a string containing HTML and will produce a domain tree.
		/// </summary>
		/// <param name="html">The HTML to be parsed</param>
		/// <returns>A tree representing the elements</returns>
		public HtmlNodeCollection Parse(string html)
		{
			HtmlNodeCollection nodes = new HtmlNodeCollection(null);

			html = PreprocessScript( html ,"script" );
			html = PreprocessScript( html ,"style" );

			html = RemoveComments( html );
			html = RemoveSGMLComments( html );
			StringCollection tokens = GetTokens( html );

			int index = 0;
			HtmlElement element = null;
			while( index < tokens.Count )
			{
				if( "<".Equals( tokens[index] ) )
				{
					// Read open tag

					index++;
					if( index >= tokens.Count ) break;
					string tag_name = tokens[index];
					index++;
					element = new HtmlElement( tag_name );
					// read the attributes and values

					while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) )
					{
						string attribute_name = tokens[ index ];
						index++;
						if( index < tokens.Count && "=".Equals( tokens[ index ] ) )
						{
							index++;
							string attribute_value;
							if( index < tokens.Count )
							{
								attribute_value = tokens[ index ];
							}
							else
							{
								attribute_value = null;
							}
							index++;
							HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) );
							element.Attributes.Add( attribute );
						}
						else if( index < tokens.Count )
						{
							// Null-value attribute
							HtmlAttribute attribute = new HtmlAttribute( attribute_name , null );
							element.Attributes.Add( attribute );
						}
					}
					nodes.Add( element );
					if( index < tokens.Count && "/>".Equals( tokens[ index ] ) )
					{
						element.IsTerminated = true;
						index++;
						element = null;
					}
					else if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
					{
						index++;
					}
				}
				else if( ">".Equals( tokens[index] ) )
				{
					index++;
				}
				else if( "</".Equals( tokens[index] ) )
				{
					// Read close tag
					index++;
					if( index >= tokens.Count ) break;
					string tag_name = tokens[index];
					index++;

					int open_index = FindTagOpenNodeIndex( nodes , tag_name );
					if( open_index != -1 )
					{
						MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] );
					}
					else
					{
						// Er, there is a close tag without an opening tag!!
					}

					// Skip to the end of this tag
					while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) )
					{
						index++;
					}
					if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
					{
						index++;
					}

					element = null;
				}
				else
				{
					// Read text
					string value = tokens[ index ];
					if( mRemoveEmptyElementText )
					{
						value = RemoveWhitespace( value );
					}
					value = DecodeScript( value );

					if( mRemoveEmptyElementText && value.Length == 0 )
					{
						// We do nothing
					}
					else
					{
						if( ! ( element != null && element.NoEscaping ) )
						{
							value = HtmlEncoder.DecodeValue( value );
						}
						HtmlText node = new HtmlText( value );
						nodes.Add( node );
					}
					index++;
				}
			}
			return nodes;
		}
Beispiel #5
0
		public HtmlNodeCollection FindByAttributeNameValue(string attributeName,string attributeValue,bool searchChildren)
		{
			HtmlNodeCollection results = new HtmlNodeCollection(null);
			foreach( HtmlNode node in base.List )
			{
				if( node is HtmlElement )
				{
					foreach( HtmlAttribute attribute in ((HtmlElement)node).Attributes )
					{
						if( attribute.Name.ToLower().Equals( attributeName.ToLower() ) )
						{
							if( attribute.Value.ToLower().Equals( attributeValue.ToLower() ) )
							{
								results.Add( node );
							}
							break;
						}
					}
					if( searchChildren )
					{
						foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByAttributeNameValue( attributeName , attributeValue , searchChildren ) )
						{
							results.Add( matchedChild );
						}
					}
				}
			}
			return results;
		}
Beispiel #6
0
		/// <summary>
		/// This will search though this collection of nodes for all elements with the
		/// specified name. If you want to search the subnodes recursively, you should
		/// pass True as the parameter in searchChildren. This search is guaranteed to
		/// return nodes in the order in which they are found in the document.
		/// </summary>
		/// <param name="name">The name of the element to find</param>
		/// <param name="searchChildren">True if you want to search sub-nodes, False to
		/// only search this collection.</param>
		/// <returns>A collection of all the nodes that macth.</returns>
		public HtmlNodeCollection FindByName(string name,bool searchChildren)
		{
			HtmlNodeCollection results = new HtmlNodeCollection(null);
			foreach( HtmlNode node in base.List )
			{
				if( node is HtmlElement )
				{
					if( ( (HtmlElement)node ).Name.ToLower().Equals( name.ToLower() ) )
					{
						results.Add( node );
					}
					if( searchChildren )
					{
						foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByName( name , searchChildren ) )
						{
							results.Add( matchedChild );
						}
					}
				}
			}
			return results;
		}