Example #1
0
 /// <summary>
 /// This constructs a new HTML element with the specified tag name.
 /// </summary>
 /// <param name="name">The name of this element</param>
 public HtmlElement(string name)
 {
     mNodes = new HtmlNodeCollection( this );
     mAttributes = new HtmlAttributeCollection(this);
     mName = name;
     mIsTerminated = false;
 }
Example #2
0
 /// <summary>
 /// This will create a new document object by parsing the HTML specified.
 /// </summary>
 /// <param name="html">The HTML to parse.</param>
 internal HtmlDocument(string html,bool wantSpaces)
 {
     HtmlParser parser = new HtmlParser();
     parser.RemoveEmptyElementText = !wantSpaces;
     mNodes = parser.Parse( html );
 }
Example #3
0
 /// <summary>
 /// This will search though this collection of nodes for all elements with the
 /// specified name. If you want to search the subnodes recursively, you should
 /// pass True as the parameter in searchChildren. This search is guaranteed to
 /// return nodes in the order in which they are found in the document.
 /// </summary>
 /// <param name="name">The name of the element to find</param>
 /// <param name="searchChildren">True if you want to search sub-nodes, False to
 /// only search this collection.</param>
 /// <returns>A collection of all the nodes that macth.</returns>
 public HtmlNodeCollection FindByName(string name,bool searchChildren)
 {
     HtmlNodeCollection results = new HtmlNodeCollection(null);
     foreach( HtmlNode node in base.List )
     {
         if( node is HtmlElement )
         {
             if( ( (HtmlElement)node ).Name.ToLower().Equals( name.ToLower() ) )
             {
                 results.Add( node );
             }
             if( searchChildren )
             {
                 foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByName( name , searchChildren ) )
                 {
                     results.Add( matchedChild );
                 }
             }
         }
     }
     return results;
 }
Example #4
0
 public HtmlNodeCollection FindByAttributeNameValue(string attributeName,string attributeValue,bool searchChildren)
 {
     HtmlNodeCollection results = new HtmlNodeCollection(null);
     foreach( HtmlNode node in base.List )
     {
         if( node is HtmlElement )
         {
             foreach( HtmlAttribute attribute in ((HtmlElement)node).Attributes )
             {
                 if( attribute.Name.ToLower().Equals( attributeName.ToLower() ) )
                 {
                     if( attribute.Value.ToLower().Equals( attributeValue.ToLower() ) )
                     {
                         results.Add( node );
                     }
                     break;
                 }
             }
             if( searchChildren )
             {
                 foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByAttributeNameValue( attributeName , attributeValue , searchChildren ) )
                 {
                     results.Add( matchedChild );
                 }
             }
         }
     }
     return results;
 }
Example #5
0
        /// <summary>
        /// This will parse a string containing HTML and will produce a domain tree.
        /// </summary>
        /// <param name="html">The HTML to be parsed</param>
        /// <returns>A tree representing the elements</returns>
        public HtmlNodeCollection Parse(string html)
        {
            HtmlNodeCollection nodes = new HtmlNodeCollection(null);

            html = PreprocessScript( html ,"script" );
            html = PreprocessScript( html ,"style" );

            html = RemoveComments( html );
            html = RemoveSGMLComments( html );
            StringCollection tokens = GetTokens( html );

            int index = 0;
            HtmlElement element = null;
            while( index < tokens.Count )
            {
                if( "<".Equals( tokens[index] ) )
                {
                    // Read open tag

                    index++;
                    if( index >= tokens.Count ) break;
                    string tag_name = tokens[index];
                    index++;
                    element = new HtmlElement( tag_name );
                    // read the attributes and values

                    while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) )
                    {
                        string attribute_name = tokens[ index ];
                        index++;
                        if( index < tokens.Count && "=".Equals( tokens[ index ] ) )
                        {
                            index++;
                            string attribute_value;
                            if( index < tokens.Count )
                            {
                                attribute_value = tokens[ index ];
                            }
                            else
                            {
                                attribute_value = null;
                            }
                            index++;
                            HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) );
                            element.Attributes.Add( attribute );
                        }
                        else if( index < tokens.Count )
                        {
                            // Null-value attribute
                            HtmlAttribute attribute = new HtmlAttribute( attribute_name , null );
                            element.Attributes.Add( attribute );
                        }
                    }
                    nodes.Add( element );
                    if( index < tokens.Count && "/>".Equals( tokens[ index ] ) )
                    {
                        element.IsTerminated = true;
                        index++;
                        element = null;
                    }
                    else if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }
                }
                else if( ">".Equals( tokens[index] ) )
                {
                    index++;
                }
                else if( "</".Equals( tokens[index] ) )
                {
                    // Read close tag
                    index++;
                    if( index >= tokens.Count ) break;
                    string tag_name = tokens[index];
                    index++;

                    int open_index = FindTagOpenNodeIndex( nodes , tag_name );
                    if( open_index != -1 )
                    {
                        MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] );
                    }
                    else
                    {
                        // Er, there is a close tag without an opening tag!!
                    }

                    // Skip to the end of this tag
                    while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }
                    if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }

                    element = null;
                }
                else
                {
                    // Read text
                    string value = tokens[ index ];
                    if( mRemoveEmptyElementText )
                    {
                        value = RemoveWhitespace( value );
                    }
                    value = DecodeScript( value );

                    if( mRemoveEmptyElementText && value.Length == 0 )
                    {
                        // We do nothing
                    }
                    else
                    {
                        if( ! ( element != null && element.NoEscaping ) )
                        {
                            value = HtmlEncoder.DecodeValue( value );
                        }
                        HtmlText node = new HtmlText( value );
                        nodes.Add( node );
                    }
                    index++;
                }
            }
            return nodes;
        }
Example #6
0
 /// <summary>
 /// This will move all the nodes from the specified index to the new parent.
 /// </summary>
 /// <param name="nodes">The collection of nodes</param>
 /// <param name="node_index">The index of the first node (in the above collection) to move</param>
 /// <param name="new_parent">The node which will become the parent of the moved nodes</param>
 private void MoveNodesDown(ref HtmlNodeCollection nodes,int node_index,HtmlElement new_parent)
 {
     for( int i = node_index ; i < nodes.Count ; i++ )
     {
         ((HtmlElement)new_parent).Nodes.Add( nodes[i] );
         nodes[i].SetParent( new_parent );
     }
     int c = nodes.Count;
     for( int i = node_index ; i < c ; i++ )
     {
         nodes.RemoveAt( node_index );
     }
     new_parent.IsExplicitlyTerminated = true;
 }
Example #7
0
 /// <summary>
 /// This will find the corresponding opening tag for the named one. This is identified as
 /// the most recently read node with the same name, but with no child nodes.
 /// </summary>
 /// <param name="nodes">The collection of nodes</param>
 /// <param name="name">The name of the tag</param>
 /// <returns>The index of the opening tag, or -1 if it was not found</returns>
 private int FindTagOpenNodeIndex(HtmlNodeCollection nodes,string name)
 {
     for( int index = nodes.Count - 1 ; index >= 0 ; index-- )
     {
         if( nodes[index] is HtmlElement )
         {
             if( ( (HtmlElement) nodes[index] ).Name.ToLower().Equals( name.ToLower() ) && ( (HtmlElement) nodes[index] ).Nodes.Count == 0 && ( (HtmlElement) nodes[index] ).IsTerminated == false )
             {
                 return index;
             }
         }
     }
     return -1;
 }