Example #1
0
        public void TestParseHtml()
        {
            string path = Path.GetFullPath(TestDataSample.GetHtmlPath("mshome.html"));

            ParserContext context = new ParserContext(path);
            IDomParser    parser  = (IDomParser)ParserFactory.CreateDom(context);
            ToxyDom       toxyDom = parser.Parse();

            List <ToxyNode> metaNodeList = toxyDom.Root.SelectNodes("//meta");

            Assert.AreEqual(7, metaNodeList.Count);

            ToxyNode aNode = toxyDom.Root.SingleSelect("//a");

            Assert.AreEqual(1, aNode.Attributes.Count);
            Assert.AreEqual("href", aNode.Attributes[0].Name);
            Assert.AreEqual("http://www.microsoft.com/en/us/default.aspx?redir=true", aNode.Attributes[0].Value);

            ToxyNode titleNode = toxyDom.Root.ChildrenNodes[0].ChildrenNodes[0].ChildrenNodes[0];

            Assert.AreEqual("title", titleNode.Name);
            Assert.AreEqual("Microsoft Corporation", titleNode.ChildrenNodes[0].InnerText);

            ToxyNode metaNode = toxyDom.Root.ChildrenNodes[0].ChildrenNodes[0].ChildrenNodes[7];

            Assert.AreEqual("meta", metaNode.Name);
            Assert.AreEqual(3, metaNode.Attributes.Count);
            Assert.AreEqual("name", metaNode.Attributes[0].Name);
            Assert.AreEqual("SearchDescription", metaNode.Attributes[0].Value);
            Assert.AreEqual("scheme", metaNode.Attributes[2].Name);
            Assert.AreEqual(string.Empty, metaNode.Attributes[2].Value);
        }
Example #2
0
        void AppendChildren(ToxyNode tnode, XmlNode ele)
        {
            if (ele.ChildNodes.Count == 0)
                return;

            foreach (XmlNode child in ele.ChildNodes)
            { 
                ToxyNode x = ConvertToToxyNode(child);
                tnode.ChildrenNodes.Add(x);
                AppendChildren(x, child);
            }
        }
Example #3
0
        void AppendChildren(ToxyNode tnode, XmlNode ele)
        {
            if (ele.ChildNodes.Count == 0)
            {
                return;
            }

            foreach (XmlNode child in ele.ChildNodes)
            {
                ToxyNode x = ConvertToToxyNode(child);
                tnode.ChildrenNodes.Add(x);
                AppendChildren(x, child);
            }
        }
Example #4
0
 ToxyNode ConvertToToxyNode(XmlNode ele)
 {
     ToxyNode tnode = new ToxyNode();
     tnode.Name = ele.Name;
     if (ele.Name == "#text")
     {
         tnode.Text = ele.InnerText;
         return tnode;
     }
     if (ele.Attributes != null)
     {
         foreach (XmlAttribute attr in ele.Attributes)
             tnode.Attributes.Add(new ToxyAttribute(attr.Name, attr.Value));
     }
     return tnode;
 }
Example #5
0
        public ToxyDom Parse()
        {
            if (!File.Exists(Context.Path))
            {
                throw new FileNotFoundException("File " + Context.Path + " is not found");
            }

            XmlDocument doc = new XmlDocument();

            doc.Load(Context.Path);

            ToxyNode rootNode = ConvertToToxyNode(doc.DocumentElement);
            ToxyDom  dom      = new ToxyDom();

            dom.Root = rootNode;
            AppendChildren(rootNode, doc.DocumentElement);
            return(dom);
        }
Example #6
0
        ToxyNode ConvertToToxyNode(XmlNode ele)
        {
            ToxyNode tnode = new ToxyNode();

            tnode.Name = ele.Name;
            if (ele.Name == "#text")
            {
                tnode.Text = ele.InnerText;
                return(tnode);
            }
            if (ele.Attributes != null)
            {
                foreach (XmlAttribute attr in ele.Attributes)
                {
                    tnode.Attributes.Add(new ToxyAttribute(attr.Name, attr.Value));
                }
            }
            return(tnode);
        }
Example #7
0
 void AppendTree(TreeNode node, ToxyNode tnode)
 {
     if (tnode.ChildrenNodes == null || tnode.ChildrenNodes.Count == 0)
     {
         return;
     }
     foreach (var child in tnode.ChildrenNodes)
     {
         TreeNode childNode;
         if (child.Name == "#text")
         {
             childNode = node.Nodes.Add(child.Text);
         }
         else
         {
             childNode = node.Nodes.Add(child.NodeString);
         }
         AppendTree(childNode, child);
     }
 }
Example #8
0
        /// <summary>
        /// Parse HTML document
        /// Note:Context.Path must be absolute path,not relative path
        /// </summary>
        /// <returns></returns>
        public ToxyDom Parse()
        {
            if (!File.Exists(Context.Path))
            {
                throw new FileNotFoundException("File " + Context.Path + " is not found");
            }

            HtmlWeb      hw      = new HtmlWeb();
            HtmlDocument htmlDoc = hw.Load(Context.Path);
            HtmlNode     docNode = htmlDoc.DocumentNode;
            ToxyNode     root    = ToxyNode.TransformHtmlNodeToToxyNode(docNode);

            Queue <KeyValuePair <HtmlNode, ToxyNode> > nodeQueue = new Queue <KeyValuePair <HtmlNode, ToxyNode> >();

            nodeQueue.Enqueue(new KeyValuePair <HtmlNode, ToxyNode>(docNode, root));
            while (nodeQueue.Count > 0)
            {
                KeyValuePair <HtmlNode, ToxyNode> pair = nodeQueue.Dequeue();
                HtmlNode htmlParentNode = pair.Key;
                ToxyNode toxyParentNode = pair.Value;
                foreach (HtmlNode htmlChildNode in htmlParentNode.ChildNodes)
                {
                    ToxyNode toxyChildNode = ToxyNode.TransformHtmlNodeToToxyNode(htmlChildNode);
                    if (htmlChildNode.Name == "#text")
                    {
                        toxyChildNode.Text = htmlChildNode.InnerText;
                    }
                    toxyParentNode.ChildrenNodes.Add(toxyChildNode);
                    nodeQueue.Enqueue(new KeyValuePair <HtmlNode, ToxyNode>(htmlChildNode, toxyChildNode));
                }
            }

            return(new ToxyDom()
            {
                Root = root
            });
        }