public HtmlNodeCollection FindByAttributeNameValue(string attributeName, string attributeValue, bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach (HtmlNode node in base.List) { if (node is HtmlElement) { foreach (HtmlAttribute attribute in ((HtmlElement)node).Attributes) { if (attribute.Name.ToLower().Equals(attributeName.ToLower())) { if (attribute.Value.ToLower().Equals(attributeValue.ToLower())) { results.Add(node); } break; } } if (searchChildren) { foreach (HtmlNode matchedChild in ((HtmlElement)node).Nodes.FindByAttributeNameValue(attributeName, attributeValue, searchChildren)) { results.Add(matchedChild); } } } } return(results); }
/// <summary> /// This will search though this collection of nodes for all elements with the /// specified name. If you want to search the subnodes recursively, you should /// pass True as the parameter in searchChildren. This search is guaranteed to /// return nodes in the order in which they are found in the document. /// </summary> /// <param name="name">The name of the element to find</param> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlNodeCollection FindByName(string name, bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach (HtmlNode node in base.List) { if (node is HtmlElement) { if (((HtmlElement)node).Name.ToLower().Equals(name.ToLower())) { results.Add(node); } if (searchChildren) { foreach (HtmlNode matchedChild in ((HtmlElement)node).Nodes.FindByName(name, searchChildren)) { results.Add(matchedChild); } } } } return(results); }
/// <summary> /// This will parse a string containing HTML and will produce a domain tree. /// </summary> /// <param name="html">The HTML to be parsed</param> /// <returns>A tree representing the elements</returns> public HtmlNodeCollection Parse(string html) { HtmlNodeCollection nodes = new HtmlNodeCollection(null); html = PreprocessScript(html, "script"); html = PreprocessScript(html, "style"); html = RemoveComments(html); html = RemoveSGMLComments(html); StringCollection tokens = GetTokens(html); int index = 0; HtmlElement element = null; while (index < tokens.Count) { if ("<".Equals(tokens[index])) { // Read open tag index++; if (index >= tokens.Count) { break; } string tag_name = tokens[index]; index++; element = new HtmlElement(tag_name); // read the attributes and values while (index < tokens.Count && !">".Equals(tokens[index]) && !"/>".Equals(tokens[index])) { string attribute_name = tokens[index]; index++; if (index < tokens.Count && "=".Equals(tokens[index])) { index++; string attribute_value; if (index < tokens.Count) { attribute_value = tokens[index]; } else { attribute_value = null; } index++; HtmlAttribute attribute = new HtmlAttribute(attribute_name, HtmlEncoder.DecodeValue(attribute_value)); element.Attributes.Add(attribute); } else if (index < tokens.Count) { // Null-value attribute HtmlAttribute attribute = new HtmlAttribute(attribute_name, null); element.Attributes.Add(attribute); } } nodes.Add(element); if (index < tokens.Count && "/>".Equals(tokens[index])) { element.IsTerminated = true; index++; element = null; } else if (index < tokens.Count && ">".Equals(tokens[index])) { index++; } } else if (">".Equals(tokens[index])) { index++; } else if ("</".Equals(tokens[index])) { // Read close tag index++; if (index >= tokens.Count) { break; } string tag_name = tokens[index]; index++; int open_index = FindTagOpenNodeIndex(nodes, tag_name); if (open_index != -1) { MoveNodesDown(ref nodes, open_index + 1, (HtmlElement)nodes[open_index]); } else { // Er, there is a close tag without an opening tag!! } // Skip to the end of this tag while (index < tokens.Count && !">".Equals(tokens[index])) { index++; } if (index < tokens.Count && ">".Equals(tokens[index])) { index++; } element = null; } else { // Read text string value = tokens[index]; if (mRemoveEmptyElementText) { value = RemoveWhitespace(value); } value = DecodeScript(value); if (mRemoveEmptyElementText && value.Length == 0) { // We do nothing } else { if (!(element != null && element.NoEscaping)) { value = HtmlEncoder.DecodeValue(value); } HtmlText node = new HtmlText(value); nodes.Add(node); } index++; } } return(nodes); }
/// <summary> /// This will parse a string containing HTML and will produce a domain tree. /// </summary> /// <param name="html">The HTML to be parsed</param> /// <returns>A tree representing the elements</returns> public HtmlNodeCollection Parse(string html) { HtmlNodeCollection nodes = new HtmlNodeCollection(null); html = PreprocessScript( html ,"script" ); html = PreprocessScript( html ,"style" ); html = RemoveComments( html ); html = RemoveSGMLComments( html ); StringCollection tokens = GetTokens( html ); int index = 0; HtmlElement element = null; while( index < tokens.Count ) { if( "<".Equals( tokens[index] ) ) { // Read open tag index++; if( index >= tokens.Count ) break; string tag_name = tokens[index]; index++; element = new HtmlElement( tag_name ); // read the attributes and values while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) ) { string attribute_name = tokens[ index ]; index++; if( index < tokens.Count && "=".Equals( tokens[ index ] ) ) { index++; string attribute_value; if( index < tokens.Count ) { attribute_value = tokens[ index ]; } else { attribute_value = null; } index++; HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) ); element.Attributes.Add( attribute ); } else if( index < tokens.Count ) { // Null-value attribute HtmlAttribute attribute = new HtmlAttribute( attribute_name , null ); element.Attributes.Add( attribute ); } } nodes.Add( element ); if( index < tokens.Count && "/>".Equals( tokens[ index ] ) ) { element.IsTerminated = true; index++; element = null; } else if( index < tokens.Count && ">".Equals( tokens[ index ] ) ) { index++; } } else if( ">".Equals( tokens[index] ) ) { index++; } else if( "</".Equals( tokens[index] ) ) { // Read close tag index++; if( index >= tokens.Count ) break; string tag_name = tokens[index]; index++; int open_index = FindTagOpenNodeIndex( nodes , tag_name ); if( open_index != -1 ) { MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] ); } else { // Er, there is a close tag without an opening tag!! } // Skip to the end of this tag while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) ) { index++; } if( index < tokens.Count && ">".Equals( tokens[ index ] ) ) { index++; } element = null; } else { // Read text string value = tokens[ index ]; if( mRemoveEmptyElementText ) { value = RemoveWhitespace( value ); } value = DecodeScript( value ); if( mRemoveEmptyElementText && value.Length == 0 ) { // We do nothing } else { if( ! ( element != null && element.NoEscaping ) ) { value = HtmlEncoder.DecodeValue( value ); } HtmlText node = new HtmlText( value ); nodes.Add( node ); } index++; } } return nodes; }
public HtmlNodeCollection FindByAttributeNameValue(string attributeName,string attributeValue,bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { foreach( HtmlAttribute attribute in ((HtmlElement)node).Attributes ) { if( attribute.Name.ToLower().Equals( attributeName.ToLower() ) ) { if( attribute.Value.ToLower().Equals( attributeValue.ToLower() ) ) { results.Add( node ); } break; } } if( searchChildren ) { foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByAttributeNameValue( attributeName , attributeValue , searchChildren ) ) { results.Add( matchedChild ); } } } } return results; }
/// <summary> /// This will search though this collection of nodes for all elements with the /// specified name. If you want to search the subnodes recursively, you should /// pass True as the parameter in searchChildren. This search is guaranteed to /// return nodes in the order in which they are found in the document. /// </summary> /// <param name="name">The name of the element to find</param> /// <param name="searchChildren">True if you want to search sub-nodes, False to /// only search this collection.</param> /// <returns>A collection of all the nodes that macth.</returns> public HtmlNodeCollection FindByName(string name,bool searchChildren) { HtmlNodeCollection results = new HtmlNodeCollection(null); foreach( HtmlNode node in base.List ) { if( node is HtmlElement ) { if( ( (HtmlElement)node ).Name.ToLower().Equals( name.ToLower() ) ) { results.Add( node ); } if( searchChildren ) { foreach( HtmlNode matchedChild in ( (HtmlElement)node ).Nodes.FindByName( name , searchChildren ) ) { results.Add( matchedChild ); } } } } return results; }