Example #1
0
        public void LastChildOfEmptyElementIsNull()
        {
            var doc       = new HtmlDocumentNode();
            var lastChild = doc.LastChild;

            Assert.Null(lastChild);
        }
Example #2
0
        public void MixedAttributes()
        {
            string input = "<name attr1=\"value\" attr2 attr3=\'\'>";

            TextFormatter formatter = new TextFormatter(input);

            NormalTagParser parser = new NormalTagParser(formatter);

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("name", node.Name);
            Assert.AreEqual(3, node.Attributes.Count);
            Assert.AreEqual("attr1", node.Attributes[0].Name);
            Assert.AreEqual("attr2", node.Attributes[1].Name);
            Assert.AreEqual("attr3", node.Attributes[2].Name);
            Assert.AreEqual("value", node.Attributes[0].Value);
            Assert.AreEqual(null, node.Attributes[1].Value);
            Assert.AreEqual("", node.Attributes[2].Value);
            Assert.AreEqual(1, node.Flags.Count);
            Assert.AreEqual(true, node.Flags.Contains(Flags.NormalTag));
        }
Example #3
0
        public void NodeDeleteTest()
        {
            string input = "<html><head>Header<br></head><body>Body<div>Content<hr></div></body></html>";

            HtmlDocument doc = new HtmlDocument(input);

            doc.Parse();

            HtmlDocumentNode root = doc.RootNode;

            Assert.AreEqual(root.InnerHtml, "<html><head>Header<br></head><body>Body<div>Content<hr></div></body></html>");

            HtmlDocumentNode nodeToDelete = root.Descendants.FirstOrDefault(x => x.Name == "head");

            root.DeleteNode(nodeToDelete);

            Assert.AreEqual(root.InnerHtml, "<html><body>Body<div>Content<hr></div></body></html>");

            HtmlDocumentNode nodeToDelete2 = root.Descendants.FirstOrDefault(x => x.Name == "hr");

            root.DeleteNode(nodeToDelete2);

            Assert.AreEqual(root.InnerHtml, "<html><body>Body<div>Content</div></body></html>");

            HtmlDocumentNode nodeToDelete3 = root.Descendants.FirstOrDefault(x => x.Name == "#text" && x.OwnText == "Content");

            root.DeleteNode(nodeToDelete3);

            Assert.AreEqual(root.InnerHtml, "<html><body>Body<div></div></body></html>");
        }
Example #4
0
        public void FirstChildOfEmptyElementIsNull()
        {
            var doc        = new HtmlDocumentNode();
            var firstChild = doc.FirstChild;

            Assert.Null(firstChild);
        }
Example #5
0
        public void ParentNodeOfNotRootElementIsNotNull()
        {
            var doc = new HtmlDocumentNode();
            var a   = new HtmlElementNode("a");

            doc.AppendChild(a);
            Assert.Equal(doc, a.ParentNode);
        }
Example #6
0
        public void LastChildIsBr()
        {
            var doc = new HtmlDocumentNode();

            doc.AppendChild(new HtmlElementNode("br"));
            var lastChild = doc.LastChild;

            Assert.Equal("br", lastChild.Name);
        }
        /// <summary>
        /// get attribute value of a node
        /// </summary>
        /// <param name="node"></param>
        /// <param name="attrName"></param>
        /// <returns></returns>
        public static string GetAttributeValueByName(HtmlDocumentNode node, string attrName)
        {
            string val = string.Empty;

            if (node.Attributes.Any(a => a.Name == attrName))
            {
                val = node.Attributes.First(a => a.Name == attrName).Value;
            }
            return(val);
        }
Example #8
0
        public void PreviousSiblingIsNull()
        {
            var doc = new HtmlDocumentNode();
            var a   = new HtmlElementNode("a");

            doc.AppendChild(a);
            var previousSibling = a.PreviousSibling;

            Assert.Null(previousSibling);
        }
Example #9
0
        public void NextSiblingIsNull()
        {
            var doc = new HtmlDocumentNode();
            var a   = new HtmlElementNode("a");

            doc.AppendChild(a);
            var nextSibling = a.NextSibling;

            Assert.Null(nextSibling);
        }
Example #10
0
        public void NextSiblingIsBr()
        {
            var doc = new HtmlDocumentNode();
            var a   = new HtmlElementNode("a");

            doc.AppendChild(a);
            doc.AppendChild(new HtmlElementNode("br"));
            var nextSibling = a.NextSibling;

            Assert.Equal("br", nextSibling.Name);
        }
Example #11
0
        public void PreviousSiblingIsBr()
        {
            var doc = new HtmlDocumentNode();
            var a   = new HtmlElementNode("a");

            doc.AppendChild(new HtmlElementNode("br"));
            doc.AppendChild(a);
            var previousSibling = a.PreviousSibling;

            Assert.Equal("br", previousSibling.Name);
        }
Example #12
0
 public override List <Chip> Parse(string url)
 {
     HtmlDocumentNode = Web.Load(url).DocumentNode;
     try
     {
         ResultGroups = HtmlDocumentNode.SelectNodes("//a[@class='link group-header']")
                        .Select(n => n.GetAttributeValue("href", null));
     }
     catch (ArgumentNullException e)
     {
         ResultGroups = null;
     }
     return(GetChipList());
 }
Example #13
0
        public void InnerTextTest()
        {
            string input =
                "Text0<div id=\"divider\" class=\"big and small\">Text1<b>Text2</b>Text3<br/><i>Text4</i><img>Text5</div>Text6Text7<br>Text8";


            HtmlDocument doc = new HtmlDocument(input);


            doc.Parse();
            HtmlDocumentNode node = doc.RootNode;

            Assert.AreEqual("Text0Text1Text2Text3Text4Text5Text6Text7Text8", node.InnerText);
        }
        public void Parse()
        {
            ParsedNode = new HtmlDocumentNode
            {
                Name     = _name,
                Position = _source.Position,
                Line     = _source.Line
            };

            ParsedNode.Flags.Add(Flags.SpecialTag);

            AddAndSkipTagOpener();
            AddAndSkipTagContent();
            AddAndSkipTagCloser();
        }
        public void JsComment()
        {
            string        input     = "/*Comment Text*/";
            TextFormatter formatter = new TextFormatter(input);

            SpecialTagParser parser = new SpecialTagParser("#jscomment", formatter, new SpecialTagParserConfiguration("/*", "*/", false));

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("#jscomment", node.Name);
            Assert.AreEqual("Comment Text", node.OwnText);
        }
        public void Doctype()
        {
            string        input     = "<!doctype doctype content>";
            TextFormatter formatter = new TextFormatter(input);

            SpecialTagParser parser = new SpecialTagParser("#doctype", formatter, new SpecialTagParserConfiguration("<!doctype ", ">", false));

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("#doctype", node.Name);
            Assert.AreEqual("doctype content", node.OwnText);
        }
        public void Script()
        {
            string        input     = "<script>script code</script>";
            TextFormatter formatter = new TextFormatter(input);

            SpecialTagParser parser = new SpecialTagParser("#script", formatter, new SpecialTagParserConfiguration("<script", "</script>", false));

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("#script", node.Name);
            Assert.AreEqual("script code", node.OwnText);
        }
        public void Conditional()
        {
            string        input     = "<![if IE6]>Conditional Comment Text<!--Nested Comment--><![endif]>";
            TextFormatter formatter = new TextFormatter(input);

            SpecialTagParser parser = new SpecialTagParser("#conditional", formatter, new SpecialTagParserConfiguration("<![if", "<![endif]>", false));

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("#conditional", node.Name);
            Assert.AreEqual("Conditional Comment Text<!--Nested Comment-->", node.OwnText);
        }
        /// <summary>
        /// get links found only from search result section of the page
        /// </summary>
        /// <param name="nodes"></param>
        /// <returns></returns>
        public static string[] GetLinksFromSearchResult(HtmlDocumentNode bodyNode)
        {
            var allLinks = bodyNode.Descendants.Where(d => d.OwnHtml.StartsWith(Constants.A_START)).ToArray();
            int i        = 0;

            string[] links = new string[200];
            foreach (HtmlDocumentNode childNode in allLinks)
            {
                //Note: comment out below "if condition" to find all appearances of the searchLink(including in ads or social networking pages)
                if (GetAttributeValueByName(childNode, Constants.HREF).StartsWith(Constants.A__START_URL))
                {
                    links[i] = childNode.OuterHtml;
                    i++;
                }
            }
            return(links);
        }
        public void XmlProcessingInstruction()
        {
            string        input     = "<? Instruction content ?>";
            TextFormatter formatter = new TextFormatter(input);

            SpecialTagParser parser = new SpecialTagParser("#xmlprocessinginstruction", formatter, new SpecialTagParserConfiguration("<?", "?>", false));

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("#xmlprocessinginstruction", node.Name);
            Assert.AreEqual(" Instruction content ", node.OwnText);
        }
Example #21
0
        private void ParseAsText()
        {
            int    position = _textFormatter.Position;
            int    line     = _textFormatter.Line;
            string text     = _textFormatter.GetTextFromCurrentPositionToAnyStopString("<", "/*");

            _currentNode = new HtmlDocumentNode()
            {
                Name       = "#text",
                Position   = position,
                Line       = line,
                OwnText    = text,
                OuterHtml  = text,
                ParentNode = _currentParent
            };
            _currentNode.Flags.Add(Flags.Text);

            AddCurrentNodeToCurrentParent();
        }
        public void Parse()
        {
            ParsedNode = new HtmlDocumentNode()
            {
                Position = _source.Position,
                Line     = _source.Line
            };

            SkipTagOpener();

            if (IsFrontslash())
            {
                ParseEndTag();
            }
            else
            {
                ParseStartTag();
            }
        }
Example #23
0
 private void Initialize()
 {
     _textFormatter = new TextFormatter(_documentHtml);
     RootNode       = new HtmlDocumentNode()
     {
         Name = "#root"
     };
     _currentParent     = RootNode;
     _normalTagParser   = new NormalTagParser(_textFormatter);
     _specialTagParsers = new List <SpecialTagParser>()
     {
         new SpecialTagParser("#doctype", _textFormatter, new SpecialTagParserConfiguration("<!doctype ", ">", false)),
         new SpecialTagParser("#conditional", _textFormatter, new SpecialTagParserConfiguration("<![if", "<![endif]>", false)),
         new SpecialTagParser("#conditionalcomment", _textFormatter, new SpecialTagParserConfiguration("<!--[if", "<![endif]-->", false)),
         new SpecialTagParser("#comment", _textFormatter, new SpecialTagParserConfiguration("<!--", "-->", false)),
         new SpecialTagParser("#jscomment", _textFormatter, new SpecialTagParserConfiguration("/*", "*/", false)),
         new SpecialTagParser("#xmlprocessinginstruction", _textFormatter, new SpecialTagParserConfiguration("<?", "?>", false)),
         new SpecialTagParser("script", _textFormatter, new SpecialTagParserConfiguration("<script", "</script>", false))
     };
 }
Example #24
0
        public void NormalTag()
        {
            string input = "<name>";

            TextFormatter formatter = new TextFormatter(input);

            NormalTagParser parser = new NormalTagParser(formatter);

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("name", node.Name);
            Assert.AreEqual(0, node.Attributes.Count);
            Assert.AreEqual(1, node.Flags.Count);
            Assert.AreEqual(true, node.Flags.Contains(Flags.NormalTag));
        }
        /// <summary>
        /// main method to scrap google and find position of a particular URL
        /// </summary>
        /// <param name="webUrl"></param>
        /// <param name="searchLink"></param>
        /// <returns></returns>
        static string Scrap(string webUrl, string searchLink)
        {
            List <string> list = new List <string>();

            //searching for links
            StringBuilder builder = new StringBuilder();

            byte[]          ResultsBuffer = new byte[8192];
            HttpWebRequest  request       = (HttpWebRequest)WebRequest.Create(webUrl);
            HttpWebResponse resp          = (HttpWebResponse)request.GetResponse();

            Stream resStream  = resp.GetResponseStream();
            string tempString = null;

            int count = 0;

            do
            {
                count = resStream.Read(ResultsBuffer, 0, ResultsBuffer.Length);
                if (count != 0)
                {
                    tempString = Encoding.ASCII.GetString(ResultsBuffer, 0, count);
                    builder.Append(tempString);
                }
            }while (count > 0);

            string html = builder.ToString();

            HtmlParser.HtmlDocumentStructure.HtmlDocument docx = new HtmlParser.HtmlDocumentStructure.HtmlDocument(html);
            docx.Parse();
            var allNodes          = (List <HtmlDocumentNode>)docx.RootNode.Descendants;
            HtmlDocumentNode body = allNodes
                                    .Where(html => html.OwnHtml.StartsWith(Constants.BODY_START)).SingleOrDefault();


            var anchors = GetLinksFromSearchResult(body);

            var positions = GetSearchURLPositions(anchors, searchLink);

            return(string.IsNullOrEmpty(positions) ? "0" : positions);
        }
Example #26
0
        public void AttributesWithQuotesWithoutValues()
        {
            string input = "<name attr=\"\" attr2=\'\'>";

            TextFormatter formatter = new TextFormatter(input);

            NormalTagParser parser = new NormalTagParser(formatter);

            if (parser.CanParse())
            {
                parser.Parse();
            }

            HtmlDocumentNode node = parser.ParsedNode;

            Assert.AreEqual("name", node.Name);
            Assert.AreEqual(2, node.Attributes.Count);
            Assert.AreEqual("attr", node.Attributes[0].Name);
            Assert.AreEqual("attr2", node.Attributes[1].Name);
            Assert.AreEqual("", node.Attributes[0].Value);
            Assert.AreEqual("", node.Attributes[1].Value);
            Assert.AreEqual(1, node.Flags.Count);
            Assert.AreEqual(true, node.Flags.Contains(Flags.NormalTag));
        }
Example #27
0
 private void ChooseCurrentNodeAsCurrentParent()
 {
     _currentParent = _currentNode;
 }
Example #28
0
        private void CreateNewNode()
        {
            _currentParser.Parse();
            _currentNode            = _currentParser.ParsedNode;
            _currentNode.ParentNode = _currentParent;

            if (IsEndTag())
            {
                if (EndTagMatchesCurrentParent())
                {
                    _currentParent.Flags.Add(Flags.ContainsClosingTag);

                    if (Configuration.IncludeClosingTagsInNodeTree)
                    {
                        _currentParent.Flags.Add(Flags.ClosingTagIncudedInNodeTree);
                    }

                    ChooseCurrentParentParentAsCurrentParent();
                }
                else
                {
                    HtmlDocumentNode parent = _currentParent;

                    while (EndTagDoesNotMatchCurrentParent())
                    {
                        ChooseCurrentParentParentAsCurrentParent();

                        if (EndTagDoesNotMatchAnyParent())
                        {
                            _currentParent = parent;
                            //Can do something with this
                            return;
                        }
                    }

                    _currentParent.Flags.Add(Flags.ContainsClosingTag);

                    if (Configuration.IncludeClosingTagsInNodeTree)
                    {
                        _currentParent.Flags.Add(Flags.ClosingTagIncudedInNodeTree);
                    }

                    ChooseCurrentParentParentAsCurrentParent();
                }
                if (Configuration.IncludeClosingTagsInNodeTree)
                {
                    AddCurrentNodeToCurrentParent();
                }
            }
            else
            {
                if (CurrentParentCanClosedByOpeningTag() && CurrentNodeCanCloseCurrentParent())
                {
                    ChooseCurrentParentParentAsCurrentParent();
                    _currentNode.ParentNode = _currentParent;
                }

                AddCurrentNodeToCurrentParent();

                if (CurrentNodeCanHaveChildren())
                {
                    ChooseCurrentNodeAsCurrentParent();
                }
            }
        }
Example #29
0
 internal override IEnumerable <string> GetResultPrice()
 {
     return(HtmlDocumentNode.SelectNodes("//div[@class='denoPrice']")
            .Select(n => n.InnerText));
 }
Example #30
0
        public void ParentNodeOfRootElementIsNull()
        {
            var doc = new HtmlDocumentNode();

            Assert.Null(doc.ParentNode);
        }