public void BadImageInLinkBug()
        {
            CreateParser(
                "<a href=\"registration.asp?EventID=1272\"><img border=\"0\" src=\"\\images\\register.gif\"</a>",
                "http://www.fedpage.com/Event.asp?EventID=1272");
            parser.RegisterScanners();
            ParseAndAssertNodeCount(1);
            Assert.IsTrue(node[0] is LinkTag, "Node should be a HTMLLinkTag");
            LinkTag linkTag = (LinkTag)node[0];

            // Get the image tag from the link

            Node[] insideNodes = new AbstractNode[10];
            int    j           = 0;

            foreach (Node nestedNode in linkTag)
            {
                insideNodes[j++] = nestedNode;
            }
            Assert.AreEqual(1, j, "Number of contained internal nodes");
            Assert.IsTrue(insideNodes[0] is ImageTag);
            ImageTag imageTag = (ImageTag)insideNodes[0];

            Assert.AreEqual("http://www.fedpage.com/images/register.gif", imageTag.ImageURL, "Image Tag Location");
        }
        public void Scan()
        {
            CreateParser("<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com");
            // Register the image scanner
            LinkScanner linkScanner = new LinkScanner("-l");

            parser.AddScanner(linkScanner);
            parser.AddScanner(linkScanner.CreateImageScanner("-i"));

            ParseAndAssertNodeCount(1);
            Assert.IsTrue(node[0] is LinkTag, "Node should be a link node");

            LinkTag linkTag = (LinkTag)node[0];

            // Get the link data and cross-check
            Node[] dataNode = new AbstractNode[10];
            int    i        = 0;

            foreach (Node nestedNode in linkTag)
            {
                dataNode[i++] = nestedNode;
            }
            Assert.AreEqual(2, i, "Number of data nodes");
            Assert.IsTrue(dataNode[0] is ImageTag, "First data node should be an Image Node");
            Assert.IsTrue(dataNode[1] is StringNode, "Second data node shouls be a String Node");

            // Check the contents of each data node
            ImageTag imageTag = (ImageTag)dataNode[0];

            Assert.AreEqual("http://www.yahoo.com/abcd.jpg", imageTag.ImageURL, "Image URL");
            StringNode stringNode = (StringNode)dataNode[1];

            Assert.AreEqual("Hello World", stringNode.Text, "String Contents");
        }
        /// <summary> Insert an EndTag in the currentLine, just before the occurrence of the provided tag
        /// </summary>
        public virtual string InsertEndTagBeforeNode(AbstractNode node, string currentLine)
        {
            string newLine = currentLine.Substring(0, node.ElementBegin);

            newLine += "</A>";
            newLine += currentLine.Substring(node.ElementBegin, currentLine.Length - node.ElementBegin);
            return(newLine);
        }
        public void LinkDataContents()
        {
            CreateParser(
                "<a href=\"http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689\" target=\"_new\"><img src=\"http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif\" width=468 height=60 border=\"0\" alt=\"See Signs in Theaters 8-2 - Starring Mel Gibson\" align=><font face=\"verdana,arial,helvetica\" SIZE=\"1\"><b></b></font></a>",
                "http://transfer.go.com");
            // Register the image scanner
            LinkScanner linkScanner = new LinkScanner("-l");

            parser.AddScanner(linkScanner);
            parser.AddScanner(linkScanner.CreateImageScanner("-i"));

            ParseAndAssertNodeCount(1);
            Assert.IsTrue(node[0] is LinkTag, "Node 0 should be a link tag");
            LinkTag linkTag = (LinkTag)node[0];

            Assert.AreEqual(
                "http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689",
                linkTag.Link, "Link URL");
            Assert.AreEqual("", linkTag.LinkText, "Link Text");
            Node[] containedNodes = new AbstractNode[10];
            int    i = 0;

            foreach (Node nestedNode in linkTag)
            {
                containedNodes[i++] = nestedNode;
            }
            Assert.AreEqual(5, i, "There should be 5 contained nodes in the link tag");
            Assert.IsTrue(containedNodes[0] is ImageTag, "First contained node should be an image tag");
            ImageTag imageTag = (ImageTag)containedNodes[0];

            Assert.AreEqual("http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif", imageTag.ImageURL,
                            "Image Location");
            Assert.AreEqual("60", imageTag["HEIGHT"], "Image Height");
            Assert.AreEqual("468", imageTag["WIDTH"], "Image Width");
            Assert.AreEqual("0", imageTag["BORDER"], "Image Border");
            Assert.AreEqual("See Signs in Theaters 8-2 - Starring Mel Gibson", imageTag["ALT"], "Image Alt");
            Assert.IsTrue(containedNodes[1] is Tag, "Second contained node should be Tag");
            Tag tag1 = (Tag)containedNodes[1];

            Assert.AreEqual("font face=\"verdana,arial,helvetica\" SIZE=\"1\"", tag1.Text, "Tag Contents");
            Assert.IsTrue(containedNodes[2] is Tag, "Third contained node should be Tag");
            Tag tag2 = (Tag)containedNodes[2];

            Assert.AreEqual("b", tag2.Text, "Tag Contents");
            Assert.IsTrue(containedNodes[3] is EndTag, "Fourth contained node should be HTMLEndTag");
            EndTag endTag1 = (EndTag)containedNodes[3];

            Assert.AreEqual("b", endTag1.Text, "Fourth Tag contents");
            Assert.IsTrue(containedNodes[4] is EndTag, "Fifth contained node should be HTMLEndTag");
            EndTag endTag2 = (EndTag)containedNodes[4];

            Assert.AreEqual("font", endTag2.Text, "Fifth Tag contents");
        }
Пример #5
0
        /// <summary> Scan the tag to see using the scanners, and attempt identification.
        /// </summary>
        /// <param name="url">URL at which HTML page is located
        /// </param>
        /// <param name="reader">The NodeReader that is to be used for reading the url
        ///
        /// </param>
        public virtual AbstractNode Scan(IDictionary scanners, string url, NodeReader reader)
        {
            if (tagContents.Length == 0)
            {
                return(this);
            }
            try
            {
                bool         found  = false;
                AbstractNode retVal = null;
                // Find the first word in the scanners
                string firstWord = ExtractWord(tagContents.ToString());
                // Now, get the scanner associated with this.
                TagScanner scanner = (TagScanner)scanners[firstWord];

                // Now do a deep check
                if (scanner != null && scanner.Evaluate(tagContents.ToString(), reader.PreviousOpenScanner))
                {
                    found = true;
                    TagScanner save;
                    save = reader.PreviousOpenScanner;
                    reader.PreviousOpenScanner = scanner;
                    retVal = scanner.CreateScannedNode(this, url, reader, tagLine);
                    reader.PreviousOpenScanner = save;
                }

                if (!found)
                {
                    return(this);
                }
                else
                {
                    return(retVal);
                }
            }
            catch (System.Exception e)
            {
                string errorMsg;
                if (tagContents != null)
                {
                    errorMsg = tagContents.ToString();
                }
                else
                {
                    errorMsg = "null";
                }
                throw new ParserException(
                          "Tag.scan() : Error while scanning tag, tag contents = " + errorMsg + ", tagLine = " + tagLine, e);
            }
        }
 public virtual Node[] ToNodeArray()
 {
     Node[] nodeArray = new AbstractNode[size];
     Array.Copy(nodeData, 0, nodeArray, 0, size);
     return(nodeArray);
 }