public override Tag CreateTag(TagData tagData, CompositeTagData compositeTagData) { string link = ExtractLink(compositeTagData.StartTag, tagData.UrlBeingParsed); int mailto = link.IndexOf("mailto"); bool mailLink = false; if (mailto == 0) { // yes it is mailto = link.IndexOf(":"); link = link.Substring(mailto + 1); mailLink = true; } int javascript = link.IndexOf("javascript:"); bool javascriptLink = false; if (javascript == 0) { link = link.Substring(11); // this magic number is "javascript:".length() javascriptLink = true; } string accessKey = GetAccessKey(compositeTagData.StartTag); string myLinkText = compositeTagData.Children.ToString(); LinkTag linkTag = new LinkTag(tagData, compositeTagData, new LinkData(link, myLinkText, accessKey, mailLink, javascriptLink)); linkTag.ThisScanner = this; return(linkTag); }
public void Scan() { CreateParser("<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com"); // Register the image scanner LinkScanner linkScanner = new LinkScanner("-l"); parser.AddScanner(linkScanner); parser.AddScanner(linkScanner.CreateImageScanner("-i")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node should be a link node"); LinkTag linkTag = (LinkTag)node[0]; // Get the link data and cross-check Node[] dataNode = new AbstractNode[10]; int i = 0; foreach (Node nestedNode in linkTag) { dataNode[i++] = nestedNode; } Assert.AreEqual(2, i, "Number of data nodes"); Assert.IsTrue(dataNode[0] is ImageTag, "First data node should be an Image Node"); Assert.IsTrue(dataNode[1] is StringNode, "Second data node shouls be a String Node"); // Check the contents of each data node ImageTag imageTag = (ImageTag)dataNode[0]; Assert.AreEqual("http://www.yahoo.com/abcd.jpg", imageTag.ImageURL, "Image URL"); StringNode stringNode = (StringNode)dataNode[1]; Assert.AreEqual("Hello World", stringNode.Text, "String Contents"); }
public void BadImageInLinkBug() { CreateParser( "<a href=\"registration.asp?EventID=1272\"><img border=\"0\" src=\"\\images\\register.gif\"</a>", "http://www.fedpage.com/Event.asp?EventID=1272"); parser.RegisterScanners(); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node should be a HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; // Get the image tag from the link Node[] insideNodes = new AbstractNode[10]; int j = 0; foreach (Node nestedNode in linkTag) { insideNodes[j++] = nestedNode; } Assert.AreEqual(1, j, "Number of contained internal nodes"); Assert.IsTrue(insideNodes[0] is ImageTag); ImageTag imageTag = (ImageTag)insideNodes[0]; Assert.AreEqual("http://www.fedpage.com/images/register.gif", imageTag.ImageURL, "Image Tag Location"); }
public void RelativeLinkNotHTMLBug() { CreateParser("<A HREF=\"newpage.html\">New Page</A>", "http://www.mysite.com/books/some.asp"); parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node should be a HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("http://www.mysite.com/books/newpage.html", linkTag.Link, "Link"); }
public void NotFTPLink() { CreateParser("<A HREF=\"ftp.html\">my ftp</A>", "http://www.cj.com/"); parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node should be a HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("my ftp", linkTag.ToPlainTextString(), "Link Plain Text"); Assert.IsFalse(linkTag.FTPLink, "Link is not a FTP site"); }
public void NotJavascriptLink() { CreateParser("<A HREF=\"javascript_not.html\">say hello</A>", "http://www.cj.com/"); parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node should be a HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("say hello", linkTag.ToPlainTextString(), "Link Plain Text"); Assert.IsFalse(linkTag.JavascriptLink, "Link is not a Javascript command"); }
public void MailtoLink() { CreateParser("<A HREF=\"mailto:[email protected]\">[email protected]</A>", "http://www.cj.com/"); parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node should be a HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("*****@*****.**", linkTag.ToPlainTextString(), "Link Plain Text"); Assert.IsTrue(linkTag.MailLink, "Link is a mail link"); }
public void RelativeLinkScan3() { CreateParser("<A HREF=\"../abc/def/mytest.html\"> Hello World</A>", "http://www.yahoo.com/ghi"); // Register the image scanner parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node identified should be HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; AssertStringEquals("Expected Link", "http://www.yahoo.com/abc/def/mytest.html", linkTag.Link); }
public void LinkContainsEqualTo() { CreateParser("<a class=rlbA href=/news/866201.asp?0sl=-" + "32>Shoe bomber handed life sentence</a>"); parser.RegisterScanners(); ParseAndAssertNodeCount(1); AssertType("node type", typeof(LinkTag), node[0]); LinkTag linkTag = (LinkTag)node[0]; AssertStringEquals("link text", "Shoe bomber handed life sentence", linkTag.LinkText); AssertStringEquals("link url", "/news/866201.asp?0sl=-32", linkTag.Link); }
public void ExtractLinkInvertedCommasBug2() { CreateParser( "<a href=\"http://cbc.ca/artsCanada/stories/greatnorth271202\" class=\"lgblacku\">Vancouver schools plan 'Great Northern Way'</a>"); parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "The node should be a link tag"); LinkTag linkTag = (LinkTag)node[0]; AssertStringEquals("Extracted Text", "Vancouver schools plan 'Great Northern Way'", linkTag.LinkText); }
public void TagSymbolsInLinkText() { CreateParser("<a href=\"/cataclysm/Langy-AnEmpireReborn-Ch2.shtml#story\"" + "><< An Empire Reborn: Chapter 2 <<</a>"); parser.RegisterScanners(); ParseAndAssertNodeCount(1); AssertType("node", typeof(LinkTag), node[0]); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("<< An Empire Reborn: Chapter 2 <<", linkTag.LinkText, "link text"); }
public void LinkSpacesBug() { CreateParser("<a href=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\">Click Here</A>"); parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "The node should be a link tag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph", linkTag.Link, "Link URL of link tag"); Assert.AreEqual("Click Here", linkTag.LinkText, "Link Text of link tag"); }
public void LinkDataContents() { CreateParser( "<a href=\"http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689\" target=\"_new\"><img src=\"http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif\" width=468 height=60 border=\"0\" alt=\"See Signs in Theaters 8-2 - Starring Mel Gibson\" align=><font face=\"verdana,arial,helvetica\" SIZE=\"1\"><b></b></font></a>", "http://transfer.go.com"); // Register the image scanner LinkScanner linkScanner = new LinkScanner("-l"); parser.AddScanner(linkScanner); parser.AddScanner(linkScanner.CreateImageScanner("-i")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node 0 should be a link tag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual( "http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689", linkTag.Link, "Link URL"); Assert.AreEqual("", linkTag.LinkText, "Link Text"); Node[] containedNodes = new AbstractNode[10]; int i = 0; foreach (Node nestedNode in linkTag) { containedNodes[i++] = nestedNode; } Assert.AreEqual(5, i, "There should be 5 contained nodes in the link tag"); Assert.IsTrue(containedNodes[0] is ImageTag, "First contained node should be an image tag"); ImageTag imageTag = (ImageTag)containedNodes[0]; Assert.AreEqual("http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif", imageTag.ImageURL, "Image Location"); Assert.AreEqual("60", imageTag["HEIGHT"], "Image Height"); Assert.AreEqual("468", imageTag["WIDTH"], "Image Width"); Assert.AreEqual("0", imageTag["BORDER"], "Image Border"); Assert.AreEqual("See Signs in Theaters 8-2 - Starring Mel Gibson", imageTag["ALT"], "Image Alt"); Assert.IsTrue(containedNodes[1] is Tag, "Second contained node should be Tag"); Tag tag1 = (Tag)containedNodes[1]; Assert.AreEqual("font face=\"verdana,arial,helvetica\" SIZE=\"1\"", tag1.Text, "Tag Contents"); Assert.IsTrue(containedNodes[2] is Tag, "Third contained node should be Tag"); Tag tag2 = (Tag)containedNodes[2]; Assert.AreEqual("b", tag2.Text, "Tag Contents"); Assert.IsTrue(containedNodes[3] is EndTag, "Fourth contained node should be HTMLEndTag"); EndTag endTag1 = (EndTag)containedNodes[3]; Assert.AreEqual("b", endTag1.Text, "Fourth Tag contents"); Assert.IsTrue(containedNodes[4] is EndTag, "Fifth contained node should be HTMLEndTag"); EndTag endTag2 = (EndTag)containedNodes[4]; Assert.AreEqual("font", endTag2.Text, "Fifth Tag contents"); }
public void BrokenLink() { CreateParser( "<a href=\"faq.html\">" + "<br>\n" + "<img src=\"images/46revues.gif\" " + "width=\"100\" " + "height=\"46\" " + "border=\"0\" " + "alt=\"Rejoignez revues.org!\" " + "align=\"middle\">", "http://www.yahoo.com"); // Register the image scanner parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node 0 should be a link tag"); LinkTag linkTag = (LinkTag)node[0]; Assert.IsNotNull(linkTag.ToString()); }
public void QueryLink() { CreateParser( "<A \n" + "HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>", "http://transfer.go.com"); // Register the image scanner parser.RegisterScanners(); ParseAndAssertNodeCount(1); Assert.IsTrue(node[0] is LinkTag, "Node 1 should be a link tag"); LinkTag linkTag = (LinkTag)node[0]; AssertStringEquals("Resolved Link", "http://transfer.go.com/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", linkTag.Link); Assert.AreEqual("20020702 Report 1", linkTag.LinkText, "Resolved Link Text"); }
public void ErroneousLinkBugFromYahoo2() { CreateParser( "<td>" + "<a href=s/8741>" + "<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" height=16 width=16 border=0>" + "</td>" + "<td nowrap> \n" + "<a href=s/7509><b>Yahoo! Movies</b></a>" + "</td>", "http://www.yahoo.com"); parser.RegisterScanners(); Node[] linkNodes = parser.ExtractAllNodesThatAre(typeof(LinkTag)); Assert.AreEqual(2, linkNodes.Length, "number of links"); LinkTag linkTag = (LinkTag)linkNodes[0]; AssertStringEquals("Link", "http://www.yahoo.com/s/8741", linkTag.Link); // Verify the link data AssertStringEquals("Link Text", "", linkTag.LinkText); }
public void ErroneousLinkBugFromYahoo() { CreateParser( "<a href=s/8741>" + "<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" " + "height=16 " + "width=16 " + "border=0>" + "This is a test\n" + "<a href=s/7509>" + "<b>Yahoo! Movies</b>" + "</a>", "http://www.yahoo.com"); parser.RegisterScanners(); ParseAndAssertNodeCount(2); // The first node should be a Tag Assert.IsTrue(node[0] is LinkTag, "First node should be a HTMLLinkTag"); // The second node should be a HTMLStringNode Assert.IsTrue(node[1] is LinkTag, "Second node should be a HTMLLinkTag"); LinkTag linkTag = (LinkTag)node[0]; Assert.AreEqual("http://www.yahoo.com/s/8741", linkTag.Link, "Link"); // Verify the link data Assert.AreEqual("This is a test\r\n", linkTag.LinkText, "Link Text"); }
public void MultipleLineBug() { CreateParser("<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n" + "href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n" + "nical.html\"> Journalism 3.0</a> by Rajesh Jain"); Parser.LineSeparator = "\r\n"; parser.AddScanner(new LinkScanner("-l")); ParseAndAssertNodeCount(8); Assert.IsTrue(node[6] is LinkTag, "Seventh node should be a link tag"); LinkTag linkTag = (LinkTag)node[6]; string exp = "http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html"; AssertStringEquals("Link URL of link tag", exp, linkTag.Link); Assert.AreEqual(" Journalism 3.0", linkTag.LinkText, "Link Text of link tag"); Assert.IsTrue(node[7] is StringNode, "Eight node should be a string node"); StringNode stringNode = (StringNode)node[7]; Assert.AreEqual(" by Rajesh Jain", stringNode.Text, "String node contents"); }
public virtual void VisitLinkTag(LinkTag linkTag) { }