Example #1
0
 /// <summary>
 /// A collection is usually associated with a parent node (an HtmlElement, actually)
 /// but you can pass null to implement an abstracted collection.
 /// </summary>
 /// <param name="parent">The parent element, or null if it is not appropriate</param>
 internal HtmlNodeCollection(HtmlElement parent)
 {
     mParent = parent;
 }
Example #2
0
 /// <summary>
 /// Internal method to maintain the identity of the parent node.
 /// </summary>
 /// <param name="parentNode">The parent node of this one</param>
 internal void SetParent(HtmlElement parentNode)
 {
     mParent = parentNode;
 }
Example #3
0
 // Public constructor to create an empty collection.
 public HtmlNodeCollection()
 {
     mParent = null;
 }
Example #4
0
 /// <summary>
 /// This constructor is used by the subclasses.
 /// </summary>
 protected HtmlNode()
 {
     mParent = null;
 }
Example #5
0
 /// <summary>
 /// This will create an empty collection of attributes.
 /// </summary>
 /// <param name="element"></param>
 internal HtmlAttributeCollection(HtmlElement element)
 {
     mElement = element;
 }
Example #6
0
 public HtmlAttributeCollection()
 {
     mElement = null;
 }
Example #7
0
        /// <summary>
        /// This will parse a string containing HTML and will produce a domain tree.
        /// </summary>
        /// <param name="html">The HTML to be parsed</param>
        /// <returns>A tree representing the elements</returns>
        public HtmlNodeCollection Parse(string html)
        {
            HtmlNodeCollection nodes = new HtmlNodeCollection(null);

            html = PreprocessScript( html ,"script" );
            html = PreprocessScript( html ,"style" );

            html = RemoveComments( html );
            html = RemoveSGMLComments( html );
            StringCollection tokens = GetTokens( html );

            int index = 0;
            HtmlElement element = null;
            while( index < tokens.Count )
            {
                if( "<".Equals( tokens[index] ) )
                {
                    // Read open tag

                    index++;
                    if( index >= tokens.Count ) break;
                    string tag_name = tokens[index];
                    index++;
                    element = new HtmlElement( tag_name );
                    // read the attributes and values

                    while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) )
                    {
                        string attribute_name = tokens[ index ];
                        index++;
                        if( index < tokens.Count && "=".Equals( tokens[ index ] ) )
                        {
                            index++;
                            string attribute_value;
                            if( index < tokens.Count )
                            {
                                attribute_value = tokens[ index ];
                            }
                            else
                            {
                                attribute_value = null;
                            }
                            index++;
                            HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) );
                            element.Attributes.Add( attribute );
                        }
                        else if( index < tokens.Count )
                        {
                            // Null-value attribute
                            HtmlAttribute attribute = new HtmlAttribute( attribute_name , null );
                            element.Attributes.Add( attribute );
                        }
                    }
                    nodes.Add( element );
                    if( index < tokens.Count && "/>".Equals( tokens[ index ] ) )
                    {
                        element.IsTerminated = true;
                        index++;
                        element = null;
                    }
                    else if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }
                }
                else if( ">".Equals( tokens[index] ) )
                {
                    index++;
                }
                else if( "</".Equals( tokens[index] ) )
                {
                    // Read close tag
                    index++;
                    if( index >= tokens.Count ) break;
                    string tag_name = tokens[index];
                    index++;

                    int open_index = FindTagOpenNodeIndex( nodes , tag_name );
                    if( open_index != -1 )
                    {
                        MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] );
                    }
                    else
                    {
                        // Er, there is a close tag without an opening tag!!
                    }

                    // Skip to the end of this tag
                    while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }
                    if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
                    {
                        index++;
                    }

                    element = null;
                }
                else
                {
                    // Read text
                    string value = tokens[ index ];
                    if( mRemoveEmptyElementText )
                    {
                        value = RemoveWhitespace( value );
                    }
                    value = DecodeScript( value );

                    if( mRemoveEmptyElementText && value.Length == 0 )
                    {
                        // We do nothing
                    }
                    else
                    {
                        if( ! ( element != null && element.NoEscaping ) )
                        {
                            value = HtmlEncoder.DecodeValue( value );
                        }
                        HtmlText node = new HtmlText( value );
                        nodes.Add( node );
                    }
                    index++;
                }
            }
            return nodes;
        }
Example #8
0
 /// <summary>
 /// This will move all the nodes from the specified index to the new parent.
 /// </summary>
 /// <param name="nodes">The collection of nodes</param>
 /// <param name="node_index">The index of the first node (in the above collection) to move</param>
 /// <param name="new_parent">The node which will become the parent of the moved nodes</param>
 private void MoveNodesDown(ref HtmlNodeCollection nodes,int node_index,HtmlElement new_parent)
 {
     for( int i = node_index ; i < nodes.Count ; i++ )
     {
         ((HtmlElement)new_parent).Nodes.Add( nodes[i] );
         nodes[i].SetParent( new_parent );
     }
     int c = nodes.Count;
     for( int i = node_index ; i < c ; i++ )
     {
         nodes.RemoveAt( node_index );
     }
     new_parent.IsExplicitlyTerminated = true;
 }