/// <summary> /// A collection is usually associated with a parent node (an HtmlElement, actually) /// but you can pass null to implement an abstracted collection. /// </summary> /// <param name="parent">The parent element, or null if it is not appropriate</param> internal HtmlNodeCollection(HtmlElement parent) { mParent = parent; }
/// <summary> /// Internal method to maintain the identity of the parent node. /// </summary> /// <param name="parentNode">The parent node of this one</param> internal void SetParent(HtmlElement parentNode) { mParent = parentNode; }
// Public constructor to create an empty collection. public HtmlNodeCollection() { mParent = null; }
/// <summary> /// This constructor is used by the subclasses. /// </summary> protected HtmlNode() { mParent = null; }
/// <summary> /// This will create an empty collection of attributes. /// </summary> /// <param name="element"></param> internal HtmlAttributeCollection(HtmlElement element) { mElement = element; }
public HtmlAttributeCollection() { mElement = null; }
/// <summary> /// This will parse a string containing HTML and will produce a domain tree. /// </summary> /// <param name="html">The HTML to be parsed</param> /// <returns>A tree representing the elements</returns> public HtmlNodeCollection Parse(string html) { HtmlNodeCollection nodes = new HtmlNodeCollection(null); html = PreprocessScript( html ,"script" ); html = PreprocessScript( html ,"style" ); html = RemoveComments( html ); html = RemoveSGMLComments( html ); StringCollection tokens = GetTokens( html ); int index = 0; HtmlElement element = null; while( index < tokens.Count ) { if( "<".Equals( tokens[index] ) ) { // Read open tag index++; if( index >= tokens.Count ) break; string tag_name = tokens[index]; index++; element = new HtmlElement( tag_name ); // read the attributes and values while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) ) { string attribute_name = tokens[ index ]; index++; if( index < tokens.Count && "=".Equals( tokens[ index ] ) ) { index++; string attribute_value; if( index < tokens.Count ) { attribute_value = tokens[ index ]; } else { attribute_value = null; } index++; HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) ); element.Attributes.Add( attribute ); } else if( index < tokens.Count ) { // Null-value attribute HtmlAttribute attribute = new HtmlAttribute( attribute_name , null ); element.Attributes.Add( attribute ); } } nodes.Add( element ); if( index < tokens.Count && "/>".Equals( tokens[ index ] ) ) { element.IsTerminated = true; index++; element = null; } else if( index < tokens.Count && ">".Equals( tokens[ index ] ) ) { index++; } } else if( ">".Equals( tokens[index] ) ) { index++; } else if( "</".Equals( tokens[index] ) ) { // Read close tag index++; if( index >= tokens.Count ) break; string tag_name = tokens[index]; index++; int open_index = FindTagOpenNodeIndex( nodes , tag_name ); if( open_index != -1 ) { MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] ); } else { // Er, there is a close tag without an opening tag!! } // Skip to the end of this tag while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) ) { index++; } if( index < tokens.Count && ">".Equals( tokens[ index ] ) ) { index++; } element = null; } else { // Read text string value = tokens[ index ]; if( mRemoveEmptyElementText ) { value = RemoveWhitespace( value ); } value = DecodeScript( value ); if( mRemoveEmptyElementText && value.Length == 0 ) { // We do nothing } else { if( ! ( element != null && element.NoEscaping ) ) { value = HtmlEncoder.DecodeValue( value ); } HtmlText node = new HtmlText( value ); nodes.Add( node ); } index++; } } return nodes; }
/// <summary> /// This will move all the nodes from the specified index to the new parent. /// </summary> /// <param name="nodes">The collection of nodes</param> /// <param name="node_index">The index of the first node (in the above collection) to move</param> /// <param name="new_parent">The node which will become the parent of the moved nodes</param> private void MoveNodesDown(ref HtmlNodeCollection nodes,int node_index,HtmlElement new_parent) { for( int i = node_index ; i < nodes.Count ; i++ ) { ((HtmlElement)new_parent).Nodes.Add( nodes[i] ); nodes[i].SetParent( new_parent ); } int c = nodes.Count; for( int i = node_index ; i < c ; i++ ) { nodes.RemoveAt( node_index ); } new_parent.IsExplicitlyTerminated = true; }