/// <summary>
 /// This will add an element to the collection.
 /// </summary>
 /// <param name="attribute">The attribute to add.</param>
 /// <returns>The index at which it was added.</returns>
 public int Add(HtmlAttribute attribute)
 {
     return base.List.Add( attribute );
 }
Exemple #2
0
        /// <summary>
        /// This will parse a string containing HTML and will produce a domain tree.
        /// </summary>
        /// <param name="html">The HTML to be parsed</param>
        /// <returns>A tree representing the elements</returns>
        public HtmlNodeCollection Parse(string html)
        {
            HtmlNodeCollection nodes = new HtmlNodeCollection(null);

            html = PreprocessScript( html ,"script" );
            html = PreprocessScript( html ,"style" );

            html = RemoveComments( html );
            html = RemoveSGMLComments( html );
            StringCollection tokens = GetTokens( html );

            int index = 0;
            HtmlElement element = null;
            while( index < tokens.Count )
            {
                if( "<".Equals( tokens[index] ) )
                {
                    // Read open tag

                    index++;
                    if( index >= tokens.Count ) break;
                    string tag_name = tokens[index];
                    index++;
                    element = new HtmlElement( tag_name );
                    // read the attributes and values

                    while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) )
                    {
                        string attribute_name = tokens[ index ];
                        index++;
                        if( index < tokens.Count && "=".Equals( tokens[ index ] ) )
                        {
                            index++;
                            string attribute_value;
                            if( index < tokens.Count )
                            {
                                attribute_value = tokens[ index ];
                            }
                            else
                            {
                                attribute_value = null;
                            }
                            index++;
                            HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) );
                            element.Attributes.Add( attribute );
                        }
                        else if( index < tokens.Count )
                        {
                            // Null-value attribute
                            HtmlAttribute attribute = new HtmlAttribute( attribute_name , null );
                            element.Attributes.Add( attribute );
                        }
                    }
                    nodes.Add( element );
                    if( index < tokens.Count && "/>".Equals( tokens[ index ] ) )
                    {
                        element.IsTerminated = true;
                        index++;
                        element = null;
                    }
                    else if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }
                }
                else if( ">".Equals( tokens[index] ) )
                {
                    index++;
                }
                else if( "</".Equals( tokens[index] ) )
                {
                    // Read close tag
                    index++;
                    if( index >= tokens.Count ) break;
                    string tag_name = tokens[index];
                    index++;

                    int open_index = FindTagOpenNodeIndex( nodes , tag_name );
                    if( open_index != -1 )
                    {
                        MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] );
                    }
                    else
                    {
                        // Er, there is a close tag without an opening tag!!
                    }

                    // Skip to the end of this tag
                    while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }
                    if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }

                    element = null;
                }
                else
                {
                    // Read text
                    string value = tokens[ index ];
                    if( mRemoveEmptyElementText )
                    {
                        value = RemoveWhitespace( value );
                    }
                    value = DecodeScript( value );

                    if( mRemoveEmptyElementText && value.Length == 0 )
                    {
                        // We do nothing
                    }
                    else
                    {
                        if( ! ( element != null && element.NoEscaping ) )
                        {
                            value = HtmlEncoder.DecodeValue( value );
                        }
                        HtmlText node = new HtmlText( value );
                        nodes.Add( node );
                    }
                    index++;
                }
            }
            return nodes;
        }