public void ParentNodeOfNotRootElementIsNotNull() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); Assert.Equal(doc, a.ParentNode); }
private static void PopulatePropertiesElement(HtmlElementNode node, ListView listView) { InitializeListView(AttributeColumns, listView); foreach (var att in node.Attributes) { var item = listView.Items.Add(att.Key); item.SubItems.Add(att.Value != null ? att.Value.Value : "(null)"); } }
public void PreviousSiblingIsNull() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); var previousSibling = a.PreviousSibling; Assert.Null(previousSibling); }
public void NextSiblingIsNull() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); var nextSibling = a.NextSibling; Assert.Null(nextSibling); }
public void PreviousSiblingIsBr() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(new HtmlElementNode("br")); doc.AppendChild(a); var previousSibling = a.PreviousSibling; Assert.Equal("br", previousSibling.Name); }
public void NextSiblingIsBr() { var doc = new HtmlDocumentNode(); var a = new HtmlElementNode("a"); doc.AppendChild(a); doc.AppendChild(new HtmlElementNode("br")); var nextSibling = a.NextSibling; Assert.Equal("br", nextSibling.Name); }
/// <summary> /// Returns true if this selector matches the specified <see cref="HtmlElementNode"/>. /// </summary> public bool IsMatch(HtmlElementNode node) { // Compare tag if (!string.IsNullOrWhiteSpace(Tag) && !string.Equals(Tag, node.TagName, HtmlRules.TagStringComparison)) { return(false); } // Compare attributes foreach (AttributeSelector selector in Attributes) { if (!selector.IsMatch(node)) { return(false); } } return(true); }
/// <summary> /// Recursively finds all the matching field elements from the given node. /// </summary> /// <param name="node">Root node to search.</param> /// <returns>All the matching field elements from the given node.</returns> internal IEnumerable <HtmlElementNode> FindValue(HtmlElementNode node) => (Selectors != null) ? Selectors.Find(node) : Enumerable.Empty <HtmlElementNode>();
internal override string GetValueFromNode(HtmlElementNode node) => node.Attributes[AttributeName]?.Value ?? string.Empty;
public static HtmlDocument BuildXmlDocument() { HtmlDocument document = new(); // XML header document.RootNodes.Add(new XmlHeaderNode(new HtmlAttributeCollection { new HtmlAttribute("xml"), new HtmlAttribute("version", "1.0"), new HtmlAttribute("encoding", "UTF-8") })); document.RootNodes.Add(new HtmlTextNode("\r\n")); // Catalog element HtmlElementNode catalogNode = document.RootNodes.Add(new HtmlElementNode("catalog")); catalogNode.Children.Add(new HtmlTextNode("\r\n ")); // Item element HtmlElementNode xmlNode = new("plant"); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("common", null, new HtmlNodeCollection { new HtmlTextNode("Bloodroot") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("botanical", null, new HtmlNodeCollection { new HtmlTextNode("Sanguinaria canadensis") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("zone", null, new HtmlNodeCollection { new HtmlTextNode("4") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("light", null, new HtmlNodeCollection { new HtmlTextNode("Mostly Shady") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("price", null, new HtmlNodeCollection { new HtmlTextNode("$2.44") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); catalogNode.Children.Add(xmlNode); catalogNode.Children.Add(new HtmlTextNode("\r\n ")); // Item element xmlNode = new("plant"); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("common", null, new HtmlNodeCollection { new HtmlTextNode("Columbine") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("botanical", null, new HtmlNodeCollection { new HtmlTextNode("Aquilegia canadensis") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("zone", null, new HtmlNodeCollection { new HtmlTextNode("3") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("light", null, new HtmlNodeCollection { new HtmlTextNode("Mostly Shady") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("price", null, new HtmlNodeCollection { new HtmlTextNode("$9.37") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); catalogNode.Children.Add(xmlNode); catalogNode.Children.Add(new HtmlTextNode("\r\n ")); // Item element xmlNode = new("plant"); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("common", null, new HtmlNodeCollection { new HtmlTextNode("Marsh Marigold") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("botanical", null, new HtmlNodeCollection { new HtmlTextNode("Caltha palustris") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("zone", null, new HtmlNodeCollection { new HtmlTextNode("4") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("light", null, new HtmlNodeCollection { new HtmlTextNode("Mostly Sunny") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("price", null, new HtmlNodeCollection { new HtmlTextNode("$6.81") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); catalogNode.Children.Add(xmlNode); catalogNode.Children.Add(new HtmlTextNode("\r\n ")); // Item element xmlNode = new("plant"); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("common", null, new HtmlNodeCollection { new HtmlTextNode("Dutchman's-Breeches") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("botanical", null, new HtmlNodeCollection { new HtmlTextNode("Dicentra cucullaria") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("zone", null, new HtmlNodeCollection { new HtmlTextNode("3") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("light", null, new HtmlNodeCollection { new HtmlTextNode("Mostly Shady") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("price", null, new HtmlNodeCollection { new HtmlTextNode("$6.44") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); catalogNode.Children.Add(xmlNode); catalogNode.Children.Add(new HtmlTextNode("\r\n ")); // Item element xmlNode = new("plant"); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("common", null, new HtmlNodeCollection { new HtmlTextNode("Ginger, Wild") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("botanical", null, new HtmlNodeCollection { new HtmlTextNode("Asarum canadense") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("zone", null, new HtmlNodeCollection { new HtmlTextNode("3") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("light", null, new HtmlNodeCollection { new HtmlTextNode("Mostly Shady") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); xmlNode.Children.Add(new HtmlElementNode("price", null, new HtmlNodeCollection { new HtmlTextNode("$9.03") })); xmlNode.Children.Add(new HtmlTextNode("\r\n ")); catalogNode.Children.Add(xmlNode); catalogNode.Children.Add(new HtmlTextNode("\r\n")); document.RootNodes.Add(new HtmlTextNode("\r\n")); return(document); }
/// <summary> /// Parses the given HTML string into a number of root nodes. /// </summary> /// <param name="html">The HTML text to parse.</param> public IEnumerable <HtmlNode> ParseChildren(string html) { TextParser parser = new TextParser(html); HtmlElementNode rootNode = new HtmlElementNode("[Root]"); HtmlElementNode parentNode = rootNode; string tag; bool selfClosing; // Loop until end of input while (!parser.EndOfText) { if (parser.Peek() == HtmlRules.TagStart) { // Test for CDATA segments, which we store but do not parse. This includes comments. CDataDefinition definition = HtmlRules.CDataDefinitions.FirstOrDefault(dd => parser.MatchesCurrentPosition(dd.StartText, dd.IgnoreCase)); if (definition != null) { parentNode.Children.Add(ParseCDataNode(parser, definition)); continue; } // Closing tag if (parser.Peek(1) == HtmlRules.ForwardSlash) { parser.MoveAhead(2); tag = parser.ParseWhile(c => HtmlRules.IsTagCharacter(c)); if (tag.Length > 0) { if (parentNode.TagName.Equals(tag, HtmlRules.TagStringComparison)) { // Should never have matched parent if the top-level node Debug.Assert(!parentNode.IsTopLevelNode); parentNode = parentNode.ParentNode; } else { // Handle mismatched closing tag int tagPriority = HtmlRules.GetTagPriority(tag); while (!parentNode.IsTopLevelNode && tagPriority > HtmlRules.GetTagPriority(parentNode.TagName)) { parentNode = parentNode.ParentNode; } if (parentNode.TagName.Equals(tag, HtmlRules.TagStringComparison)) { Debug.Assert(!parentNode.IsTopLevelNode); parentNode = parentNode.ParentNode; } } } parser.MoveTo(HtmlRules.TagEnd); parser.MoveAhead(); continue; } // Open tag if (ParseTag(parser, out tag)) { HtmlTagFlag flags = HtmlRules.GetTagFlags(tag); if (flags.HasFlag(HtmlTagFlag.HtmlHeader)) { parentNode.Children.Add(ParseHtmlHeader(parser)); } else if (flags.HasFlag(HtmlTagFlag.XmlHeader)) { parentNode.Children.Add(ParseXmlHeader(parser)); } else { // Parse attributes HtmlAttributeCollection attributes = ParseAttributes(parser); // Parse rest of tag if (parser.Peek() == HtmlRules.ForwardSlash) { parser.MoveAhead(); parser.MovePastWhitespace(); selfClosing = true; } else { selfClosing = false; } parser.MoveTo(HtmlRules.TagEnd); parser.MoveAhead(); // Add node HtmlElementNode node = new HtmlElementNode(tag, attributes); while (!HtmlRules.TagMayContain(parentNode.TagName, tag) && !parentNode.IsTopLevelNode) { Debug.Assert(parentNode.ParentNode != null); parentNode = parentNode.ParentNode; } parentNode.Children.Add(node); if (flags.HasFlag(HtmlTagFlag.CData)) { // CDATA tags are treated as elements but we store and do not parse the inner content if (!selfClosing) { if (ParseToClosingTag(parser, tag, out string content) && content.Length > 0) { node.Children.Add(new HtmlCDataNode(string.Empty, string.Empty, content)); } } } else { if (selfClosing && flags.HasFlag(HtmlTagFlag.NoSelfClosing)) { selfClosing = false; } if (!selfClosing && !flags.HasFlag(HtmlTagFlag.NoChildren)) { parentNode = node; // Node becomes new parent } } } continue; } } // Text node int start = parser.Index; // Text must be at least 1 character (handle '<' that is not part of a tag) parser.MoveAhead(); parser.MoveTo(HtmlRules.TagStart); Debug.Assert(parser.Index > start); parentNode.Children.Add(new HtmlTextNode(parser.Extract(start, parser.Index))); } // return(rootNode.Children); }
private static string ShortDescriptionElement(HtmlElementNode node) => $"<{node.TagName}>";
private static string LongDescriptionElement(HtmlElementNode node) => string.Empty;
public HtmlNodeCollection(HtmlElementNode parentNode) { ParentNode = parentNode; }
/// <summary> /// Parses the given HTML string into a collection of root nodes and their /// children. /// </summary> /// <param name="html">The HTML text to parse.</param> public IEnumerable <HtmlNode> ParseChildren(string?html, bool ignoreHtmlRules = false) { HtmlElementNode rootNode = new("[TempContainer]"); HtmlElementNode parentNode = rootNode; Parser.Reset(html); bool selfClosing; string?tag; // Loop until end of input while (!Parser.EndOfText) { if (Parser.Peek() == HtmlRules.TagStart) { // CDATA segments (blocks we store but don't parse--includes comments) CDataDefinition?definition = HtmlRules.CDataDefinitions.FirstOrDefault(dd => Parser.MatchesCurrentPosition(dd.StartText, dd.StartComparison)); if (definition != null) { parentNode.Children.Add(ParseCDataNode(definition)); continue; } // Closing tag if (Parser.Peek(1) == HtmlRules.ForwardSlash) { Parser.Index += 2; tag = Parser.ParseWhile(HtmlRules.IsTagCharacter); if (tag.Length > 0) { if (parentNode.TagName.Equals(tag, HtmlRules.TagStringComparison)) { // Should never have matched parent if the top-level node if (!parentNode.IsTopLevelNode) { parentNode = parentNode.ParentNode; } } else { // Handle mismatched closing tag int tagPriority = HtmlRules.GetTagNestLevel(tag); while (!parentNode.IsTopLevelNode && tagPriority > HtmlRules.GetTagNestLevel(parentNode.TagName)) { parentNode = parentNode.ParentNode; } if (parentNode.TagName.Equals(tag, HtmlRules.TagStringComparison)) { if (!parentNode.IsTopLevelNode) { parentNode = parentNode.ParentNode; } } } } Parser.SkipTo(HtmlRules.TagEnd); Parser.Next(); continue; } // Open tag if (ParseTag(out tag)) { HtmlTagFlag flags = ignoreHtmlRules ? HtmlTagFlag.None : HtmlRules.GetTagFlags(tag); if (flags.HasFlag(HtmlTagFlag.HtmlHeader)) { parentNode.Children.Add(ParseHtmlHeader()); } else if (flags.HasFlag(HtmlTagFlag.XmlHeader)) { parentNode.Children.Add(ParseXmlHeader()); } else { // Parse attributes HtmlAttributeCollection attributes = ParseAttributes(); // Parse rest of tag if (Parser.Peek() == HtmlRules.ForwardSlash) { Parser.Next(); Parser.SkipWhiteSpace(); selfClosing = true; } else { selfClosing = false; } Parser.SkipTo(HtmlRules.TagEnd); Parser.Next(); // Add node HtmlElementNode node = new(tag, attributes); while (!HtmlRules.TagMayContain(parentNode.TagName, tag) && !parentNode.IsTopLevelNode) { parentNode = parentNode.ParentNode; } parentNode.Children.Add(node); if (flags.HasFlag(HtmlTagFlag.CData)) { // CDATA tags are treated as elements but we store and do not parse the inner content if (!selfClosing) { if (ParseToClosingTag(tag, out string?content) && content.Length > 0) { node.Children.Add(new HtmlCDataNode(string.Empty, string.Empty, content)); } } } else { if (selfClosing && flags.HasFlag(HtmlTagFlag.NoSelfClosing)) { selfClosing = false; } if (!selfClosing && !flags.HasFlag(HtmlTagFlag.NoChildren)) { parentNode = node; // Node becomes new parent } } } continue; } } // Text node: must be at least 1 character (handles '<' that was not a tag) string text = Parser.ParseCharacter(); text += Parser.ParseTo(HtmlRules.TagStart); parentNode.Children.Add(new HtmlTextNode(text)); } // Return top-level nodes from nodes just parsed return(rootNode.Children); }
/// <summary> /// Extracts this field value from the given node. /// </summary> internal abstract string GetValueFromNode(HtmlElementNode node);
internal override string GetValueFromNode(HtmlElementNode node) => node.Text;
public static HtmlDocument BuildHtmlDocument() { HtmlDocument document = new(); // HTML header document.RootNodes.Add(new HtmlHeaderNode(new HtmlAttributeCollection { new HtmlAttribute("html") })); document.RootNodes.Add(new HtmlTextNode("\r\n")); // HTML element HtmlElementNode htmlNode = document.RootNodes.Add(new HtmlElementNode("html")); htmlNode.Children.Add(new HtmlTextNode("\r\n ")); // Head element HtmlElementNode headNode = htmlNode.Children.Add(new HtmlElementNode("head")); headNode.Children.Add(new HtmlTextNode("\r\n ")); // Title element HtmlElementNode node = headNode.Children.Add(new HtmlElementNode("title")); node.Children.Add(new HtmlTextNode("Title")); // Meta element headNode.Children.Add(new HtmlTextNode("\r\n ")); headNode.Children.Add(new HtmlElementNode("meta", new HtmlAttributeCollection { new HtmlAttribute("name", "description"), new HtmlAttribute("content", "This is my test meta description node!") })); headNode.Children.Add(new HtmlTextNode("\r\n ")); // Body element htmlNode.Children.Add(new HtmlTextNode("\r\n ")); HtmlElementNode bodyNode = htmlNode.Children.Add(new HtmlElementNode("body")); // Comment bodyNode.Children.Add(new HtmlTextNode("\r\n ")); bodyNode.Children.Add(new HtmlCDataNode("<!--", "-->", " Here's a comment! ")); // First paragraph bodyNode.Children.Add(new HtmlTextNode("\r\n ")); node = bodyNode.Children.Add(new HtmlElementNode("p", new HtmlAttributeCollection { new HtmlAttribute("id", "par1") })); node.Children.Add(new HtmlTextNode("\r\n ")); node.Children.Add(new HtmlTextNode("This is my first paragraph")); node.Children.Add(new HtmlTextNode("\r\n ")); // Second paragraph bodyNode.Children.Add(new HtmlTextNode("\r\n ")); node = bodyNode.Children.Add(new HtmlElementNode("p", new HtmlAttributeCollection { new HtmlAttribute("id", "par2") })); node.Children.Add(new HtmlTextNode("\r\n ")); node.Children.Add(new HtmlTextNode("This is my second paragraph")); node.Children.Add(new HtmlTextNode("\r\n ")); bodyNode.Children.Add(new HtmlTextNode("\r\n ")); htmlNode.Children.Add(new HtmlTextNode("\r\n")); return(document); }