public void CompositeTagWithAnotherTagChild() { CreateParser("<Custom>" + "<Another/>" + "</Custom>"); parser.AddScanner(new AnotherScanner()); CustomTag customTag = ParseCustomTag(1); int x = customTag.ChildCount; Assert.AreEqual(1, customTag.ChildCount, "child count"); Assert.IsFalse(customTag.EmptyXmlTag, "custom tag should not be xml end tag"); Assert.AreEqual(0, customTag.StartTag.ElementBegin, "starting loc"); Assert.AreEqual(7, customTag.StartTag.ElementEnd, "ending loc"); Assert.AreEqual(0, customTag.ElementBegin, "custom tag starting loc"); Assert.AreEqual(26, customTag.ElementEnd, "custom tag ending loc"); Node child = customTag[0]; AssertType("child", typeof(AnotherTag), child); AnotherTag tag = (AnotherTag)child; Assert.AreEqual(8, tag.ElementBegin, "another tag start pos"); Assert.AreEqual(17, tag.ElementEnd, "another tag ending pos"); Assert.AreEqual(18, customTag.EndTag.ElementBegin, "custom end tag start pos"); AssertStringEquals("child html", "<ANOTHER/>", child.ToHtml()); }
public void ParentConnections() { CreateParser("<custom>" + "<custom>something</custom>" + "</custom>"); parser.AddScanner(new CustomScanner(this, false)); parser.AddScanner(new AnotherScanner()); ParseAndAssertNodeCount(3); CustomTag customTag = (CustomTag)node[0]; AssertStringEquals("first custom tag html", "<CUSTOM></CUSTOM>", customTag.ToHtml()); Assert.IsNull(customTag.Parent, "first custom tag should have no parent"); customTag = (CustomTag)node[1]; AssertStringEquals("first custom tag html", "<CUSTOM>something</CUSTOM>", customTag.ToHtml()); Assert.IsNull(customTag.Parent, "second custom tag should have no parent"); Node firstChild = customTag[0]; AssertType("firstChild", typeof(StringNode), firstChild); CompositeTag parent = firstChild.Parent; Assert.IsNotNull(parent, "first child parent should not be null"); Assert.AreSame(customTag, parent, "parent and custom tag should be the same"); EndTag endTag = (EndTag)node[2]; AssertStringEquals("first custom tag html", "</CUSTOM>", endTag.ToHtml()); Assert.IsNull(endTag.Parent, "end tag should have no parent"); }
/// <summary> Search given node and pick up any objects of given type, return /// Node array. /// </summary> /// <param name="">node /// </param> /// <param name="">type /// </param> /// <returns> Node[] /// /// </returns> public static Node[] FindTypeInNode(Node node, Type type) { NodeList nodeList = new NodeList(); node.CollectInto(nodeList, type); Node[] spans = nodeList.ToNodeArray(); return(spans); }
public virtual void Add(Node node) { if (size == capacity) { AdjustVectorCapacity(); } nodeData[size++] = node; }
public void IsXMLTag() { CreateParser("<OPTION value=\"#\">Select a destination</OPTION>"); NodeIterator iterator = parser.GetEnumerator(); iterator.MoveNext(); Node node = (Node)iterator.Current; Assert.IsTrue(TagScanner.IsXMLTagFound(node, "OPTION"), "OPTION tag could not be identified"); }
/// <summary> Insert the given node at the head of the list. /// </summary> /// <param name="node">The new first element. /// /// </param> public virtual void Prepend(Node node) { if (size == capacity) { AdjustVectorCapacity(); } Array.Copy(nodeData, 0, nodeData, 1, size); size++; nodeData[0] = node; }
private void DoEmptyXmlTagCheckOn(Node currentNode) { if (currentNode is Tag) { Tag possibleEndTag = (Tag)currentNode; if (IsXmlEndTag(tag)) { endTag = possibleEndTag; endTagFound = true; } } }
public static bool IsXMLTagFound(Node node, string tagName) { bool xmlTagFound = false; if (node is Tag) { Tag tag = (Tag)node; if (tag.Text.ToUpper().IndexOf(tagName) == 0) { xmlTagFound = true; } } return(xmlTagFound); }
public void IgnoreState() { string testHTML = "<A \n" + "HREF=\"/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html\">20020702 Report 1</A>"; CreateParser(testHTML); Node node = Tag.Find(parser.Reader, testHTML, 0); Assert.IsTrue(node is Tag, "Node should be a tag"); Tag tag = (Tag)node; string href = tag["HREF"]; AssertStringEquals("Resolved Link", "/a?b=c>d&e=f&g=h&i=http://localhost/Testing/Report1.html", href); }
private void DoChildAndEndTagCheckOn(Node currentNode) { if (currentNode is EndTag) { EndTag possibleEndTag = (EndTag)currentNode; if (IsExpectedEndTag(possibleEndTag)) { endTagFound = true; endTag = possibleEndTag; return; } } nodeList.Add(currentNode); scanner.ChildNodeEncountered(currentNode); }
private Tag CreateTag() { CompositeTag newTag = (CompositeTag) scanner.CreateTag( new TagData(tag.ElementBegin, endTag.ElementEnd, startingLineNumber, endingLineNumber, tag.Text, currLine, url, tag.EmptyXmlTag), new CompositeTagData(tag, endTag, nodeList)); for (int i = 0; i < newTag.ChildCount; i++) { Node child = newTag[i]; child.Parent = newTag; } return(newTag); }
public void XmlTypeCompositeTags() { CreateParser("<Custom>" + "<Another name=\"subtag\"/>" + "<Custom />" + "</Custom>" + "<Custom/>"); parser.AddScanner(new CustomScanner(this)); parser.AddScanner(new AnotherScanner()); ParseAndAssertNodeCount(2); AssertType("first node", typeof(CustomTag), this.node[0]); AssertType("second node", typeof(CustomTag), this.node[1]); CustomTag customTag = (CustomTag)this.node[0]; Node node = customTag[0]; AssertType("first child", typeof(AnotherTag), node); node = customTag[1]; AssertType("second child", typeof(CustomTag), node); }
public void ParseParameterSpace() { Tag tag; EndTag etag; StringNode snode; Node node = null; string lin1 = "<A yourParameter = \"Kaarle\">Kaarle's homepage</A>"; CreateParser(lin1); NodeIterator en = parser.GetEnumerator(); System.Collections.Hashtable h; string a, nice; try { Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; tag = (Tag)node; h = tag.Attributes; a = (string)h[Tag.TAGNAME]; nice = (string)h["YOURPARAMETER"]; Assert.AreEqual(a, "A", "Link tag (A)"); Assert.AreEqual("Kaarle", nice, "yourParameter value"); if (!(node is LinkTag)) { // linkscanner has eaten up this piece Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; snode = (StringNode)node; Assert.AreEqual(snode.Text, "Kaarle's homepage", "Value of element"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; etag = (EndTag)node; Assert.AreEqual(etag.Text, "A", "Still patragraph endtag"); } // testing rest } catch (System.InvalidCastException) { Assert.Fail("Bad class element = " + node.GetType().FullName); } }
public void Scan() { CreateParser("<html>" + " <head>"+ " <title>Some Title</title>"+ " </head>"+ " <body>" + " Some data"+ " </body>"+ "</html>"); parser.AddScanner(new TitleScanner("")); parser.AddScanner(new HtmlScanner()); ParseAndAssertNodeCount(1); AssertType("html tag", typeof(Html), this.node[0]); Html html = (Html)this.node[0]; NodeList nodeList = new NodeList(); html.CollectInto(nodeList, typeof(TitleTag)); Assert.AreEqual(1, nodeList.Size, "nodelist size"); Node node = nodeList[0]; AssertType("expected title tag", typeof(TitleTag), node); TitleTag titleTag = (TitleTag)node; AssertStringEquals("title", "Some Title", titleTag.Title); }
public void CompositeTagWithTagChild() { CreateParser("<Custom>" + "<Hello>" + "</Custom>"); CustomTag customTag = ParseCustomTag(1); int x = customTag.ChildCount; Assert.AreEqual(1, customTag.ChildCount, "child count"); Assert.IsFalse(customTag.EmptyXmlTag, "custom tag should not be xml end tag"); Assert.AreEqual(0, customTag.StartTag.ElementBegin, "starting loc"); Assert.AreEqual(7, customTag.StartTag.ElementEnd, "ending loc"); Assert.AreEqual(0, customTag.ElementBegin, "custom tag starting loc"); Assert.AreEqual(23, customTag.ElementEnd, "custom tag ending loc"); Node child = customTag[0]; AssertType("child", typeof(Tag), child); Tag tag = (Tag)child; AssertStringEquals("child html", "<HELLO>", child.ToHtml()); }
public void CompositeTagWithOneTextChild() { CreateParser("<Custom>" + "Hello" + "</Custom>"); CustomTag customTag = ParseCustomTag(1); int x = customTag.ChildCount; Assert.AreEqual(1, customTag.ChildCount, "child count"); Assert.IsFalse(customTag.EmptyXmlTag, "custom tag should not be xml end tag"); Assert.AreEqual(0, customTag.StartTag.ElementBegin, "starting loc"); Assert.AreEqual(7, customTag.StartTag.ElementEnd, "ending loc"); Assert.AreEqual(1, customTag.tagData.StartLine, "starting line position"); Assert.AreEqual(1, customTag.tagData.EndLine, "ending line position"); Node child = customTag[0]; AssertType("child", typeof(StringNode), child); StringNode text = (StringNode)child; AssertStringEquals("child text", "Hello", child.ToPlainTextString()); }
public virtual Tag Scan() { this.startingLineNumber = reader.LastLineNumber; if (ShouldCreateEndTagAndExit()) { return(CreateEndTagAndRepositionReader()); } scanner.BeforeScanningStarts(); Node currentNode = tag; DoEmptyXmlTagCheckOn(currentNode); if (!endTagFound) { do { currentNode = reader.ReadElement(balance_quotes); if (currentNode == null) { continue; } currLine = reader.CurrentLine; if (currentNode is Tag) { DoForceCorrectionCheckOn((Tag)currentNode); } DoEmptyXmlTagCheckOn(currentNode); if (!endTagFound) { DoChildAndEndTagCheckOn(currentNode); } } while (currentNode != null && !endTagFound); } if (endTag == null) { CreateCorrectionEndTagBefore(reader.LastReadPosition + 1); } this.endingLineNumber = reader.LastLineNumber; return(CreateTag()); }
public void CompositeTagWithNestedTag() { CreateParser("<Custom>" + "<Another>" + "Hello" + "</Another>" + "<Custom/>" + "</Custom>" + "<Custom/>"); parser.AddScanner(new CustomScanner(this)); parser.AddScanner(new AnotherScanner()); ParseAndAssertNodeCount(2); AssertType("first node", typeof(CustomTag), this.node[0]); AssertType("second node", typeof(CustomTag), this.node[1]); CustomTag customTag = (CustomTag)this.node[0]; Node node = customTag[0]; AssertType("first child", typeof(AnotherTag), node); AnotherTag anotherTag = (AnotherTag)node; Assert.AreEqual(1, anotherTag.ChildCount, "another tag children count"); node = anotherTag[0]; AssertType("nested child", typeof(StringNode), node); StringNode text = (StringNode)node; Assert.AreEqual("Hello", text.ToPlainTextString(), "text"); }
public void ComplexNesting() { CreateParser("<custom>" + "<custom>" + "<another>" + "</custom>" + "<custom>" + "<another>" + "</custom>" + "</custom>"); parser.AddScanner(new CustomScanner(this)); parser.AddScanner(new AnotherScanner(false)); ParseAndAssertNodeCount(1); AssertType("root node", typeof(CustomTag), node[0]); CustomTag root = (CustomTag)node[0]; AssertNodeCount("child count", 2, root.ChildrenAsNodeArray); Node child = root[0]; AssertType("child", typeof(CustomTag), child); CustomTag customChild = (CustomTag)child; AssertNodeCount("grand child count", 1, customChild.ChildrenAsNodeArray); Node grandchild = customChild[0]; AssertType("grandchild", typeof(AnotherTag), grandchild); }
public void ParseParameterA() { Tag tag; EndTag etag; StringNode snode; Node node = null; string lin1 = "<A href=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaarle Kaaila\">Kaarle's homepage</A><p>Paragraph</p>"; CreateParser(lin1); NodeIterator en = parser.GetEnumerator(); System.Collections.Hashtable h; string a, href, myValue, nice; try { Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; tag = (Tag)node; h = tag.Attributes; a = (string)h[Tag.TAGNAME]; href = (string)h["HREF"]; myValue = (string)h["MYPARAMETER"]; nice = (string)h["YOURPARAMETER"]; Assert.AreEqual("A", a, "Link tag (A)"); Assert.AreEqual("http://www.iki.fi/kaila", href, "href value"); Assert.AreEqual("", myValue, "myparameter value"); Assert.AreEqual("Kaarle Kaaila", nice, "yourparameter value"); if (!(node is LinkTag)) { // linkscanner has eaten up this piece Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; snode = (StringNode)node; Assert.AreEqual(snode.Text, "Kaarle's homepage", "Value of element"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; etag = (EndTag)node; Assert.AreEqual(etag.Text, "A", "endtag of link"); } // testing rest Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; tag = (Tag)node; Assert.AreEqual(tag.Text, "p", "following paragraph begins"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; snode = (StringNode)node; Assert.AreEqual(snode.Text, "Paragraph", "paragraph contents"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; etag = (EndTag)node; Assert.AreEqual(etag.Text, "p", "paragraph endtag"); } catch (System.InvalidCastException) { Assert.Fail("Bad class element = " + node.GetType().FullName); } }
/// <summary> This method is called everytime a child to the composite is found. It is useful when we /// need to store special children seperately. Though, all children are collected anyway into a node list. /// </summary> public virtual void ChildNodeEncountered(Node node) { }
public void ParseParameterG() { Tag tag; EndTag etag; StringNode snode; Node node = null; string lin1 = "<G href=\"http://www.iki.fi/kaila\" myParameter yourParameter=\"Kaila\">Kaarle's homepage</G><p>Paragraph</p>"; CreateParser(lin1); NodeIterator en = parser.GetEnumerator(); System.Collections.Hashtable h; string a, href, myValue, nice; try { Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; tag = (Tag)node; h = tag.Attributes; a = (string)h[Tag.TAGNAME]; href = (string)h["HREF"]; myValue = (string)h["MYPARAMETER"]; nice = (string)h["YOURPARAMETER"]; Assert.AreEqual(a, "G", "The tagname should be G"); Assert.AreEqual(href, "http://www.iki.fi/kaila", "Check the http address"); Assert.AreEqual(myValue, "", "myValue is empty"); Assert.AreEqual(nice, "Kaila", "The second parameter value"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; snode = (StringNode)node; Assert.AreEqual(snode.Text, "Kaarle's homepage", snode.Text, "Kaarle's homepage"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; etag = (EndTag)node; Assert.AreEqual(etag.Text, "G", "Endtag is G"); // testing rest Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; tag = (Tag)node; Assert.AreEqual(tag.Text, "p", "Follow up by p-tag"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; snode = (StringNode)node; Assert.AreEqual(snode.Text, "Paragraph", "Verify the paragraph text"); Assert.IsTrue(en.MoveNext()); node = (Node)en.Current; etag = (EndTag)node; Assert.AreEqual(etag.Text, "p", "Still patragraph endtag"); } catch (System.InvalidCastException) { Assert.Fail("Bad class element = " + node.GetType().FullName); } }
public static string ExtractXMLData(Node node, string tagName, NodeReader reader) { try { string xmlData = ""; bool xmlTagFound = IsXMLTagFound(node, tagName); if (xmlTagFound) { try { do { node = reader.ReadElement(); if (node != null) { if (node is StringNode) { StringNode stringNode = (StringNode)node; if (xmlData.Length > 0) { xmlData += " "; } xmlData += stringNode.Text; } else if (!(node is org.htmlparser.tags.EndTag)) { xmlTagFound = false; } } } while (node is StringNode); } catch (System.Exception e) { throw new ParserException( "HTMLTagScanner.extractXMLData() : error while trying to find xml tag", e); } } if (xmlTagFound) { if (node != null) { if (node is org.htmlparser.tags.EndTag) { org.htmlparser.tags.EndTag endTag = (org.htmlparser.tags.EndTag)node; if (!endTag.Text.Equals(tagName)) { xmlTagFound = false; } } } } if (xmlTagFound) { return(xmlData); } else { return(null); } } catch (System.Exception e) { throw new ParserException( "HTMLTagScanner.extractXMLData() : Error occurred while trying to extract xml tag", e); } }