コード例 #1
0
        public void When_read_a_simple_tag()
        {
            var sourceCode = " dada\n<div class=\"login box1\" id=\"div1\" data-tooltip=\"salut, ça va?\">login: \n\t romcy</div>"
                             + "<img src=\"http://popo.fr/titi.gif\" />";

            var codeReader        = new CodeReader(sourceCode);
            var declarationReader = new HtmlDeclarationReader(codeReader);
            var domBuilder        = new HtmlDomBuilder(declarationReader);

            var elements = domBuilder.BuildDom().ToList();

            Assert.AreEqual(3, elements.Count);

            Assert.AreEqual(" dada\n", elements[0].InnerText);
            Assert.IsEmpty(elements[0].Name);

            Assert.AreEqual("div", elements[1].Name);
            Assert.AreEqual(3, elements[1].Attributes.Count);
            Assert.AreEqual("login box1", elements[1].Attributes["class"]);
            Assert.AreEqual("div1", elements[1].Attributes["id"]);
            Assert.AreEqual("salut, ça va?", elements[1].Attributes["data-tooltip"]);

            Assert.AreEqual("login: \n\t romcy", elements[1].InnerText);

            Assert.AreEqual(1, elements[1].Children.Count);
            Assert.IsEmpty(elements[1].Children[0].Name);
            Assert.AreEqual("login: \n\t romcy", elements[1].Children[0].InnerText);

            Assert.AreEqual("img", elements[2].Name);
            Assert.AreEqual(1, elements[2].Attributes.Count);
            Assert.AreEqual("http://popo.fr/titi.gif", elements[2].Attributes["src"]);
            Assert.AreEqual(0, elements[2].Children.Count);
        }
コード例 #2
0
ファイル: HDocument.cs プロジェクト: ziomyslaw/ScrapySharp
        public static HDocument Parse(string source)
        {
            var codeReader        = new CodeReader(source);
            var declarationReader = new HtmlDeclarationReader(codeReader);
            var domBuilder        = new HtmlDomBuilder(declarationReader);

            return(new HDocument
            {
                Children = domBuilder.BuildDom().ToList()
            });
        }
コード例 #3
0
        public void When_parsing_ValidPage1()
        {
            var source = File.ReadAllText("Html/ValidPage1.htm");

            TestWordsReading(source);

            var codeReader        = new CodeReader(source);
            var declarationReader = new HtmlDeclarationReader(codeReader);

            var tag = declarationReader.ReadTagDeclaration();

            Assert.AreEqual("html", tag.Name);
            Assert.AreEqual(DeclarationType.OpenTag, tag.Type);

            tag = declarationReader.ReadTagDeclaration();
            Assert.AreEqual(null, tag.Name);
            Assert.AreEqual(DeclarationType.TextElement, tag.Type);
            Assert.AreEqual("\r\n    ", tag.InnerText);

            tag = declarationReader.ReadTagDeclaration();
            Assert.AreEqual("div", tag.Name);
            Assert.AreEqual(DeclarationType.OpenTag, tag.Type);

            tag = declarationReader.ReadTagDeclaration();
            Assert.AreEqual(null, tag.Name);
            Assert.AreEqual(DeclarationType.TextElement, tag.Type);
            Assert.AreEqual("test", tag.InnerText);

            tag = declarationReader.ReadTagDeclaration();
            Assert.AreEqual("div", tag.Name);
            Assert.AreEqual(DeclarationType.CloseTag, tag.Type);


            var document = HDocument.Parse(source);

            Assert.AreEqual(1, document.Elements("html").Count());
            Assert.AreEqual(2, document.Elements("html").Elements("div").Count());

            Assert.AreEqual("test", document.Elements("html").Elements("div").ElementAt(0).InnerText);

            Assert.AreEqual(2, document.Elements("html").Elements("div").ElementAt(1).Elements("span").Count());
            Assert.AreEqual("Welcome", document.Elements("html").Elements("div").ElementAt(1).Elements("span").ElementAt(0).InnerText);
            Assert.AreEqual(" toto", document.Elements("html").Elements("div").ElementAt(1).Elements("span").ElementAt(1).InnerText);

            Assert.AreEqual(2, document.Descendants("div").Count());
            Assert.AreEqual(2, document.Descendants("span").Count());
        }
コード例 #4
0
        public void When_read_a_simple_tag()
        {
            var sourceCode = " dada\n<div class=\"login box1\" id=\"div1\" data-tooltip=\"salut, ça va?\">login: \n\t romcy</div>"
                             + "<img src=\"http://popo.fr/titi.gif\" />";

            var codeReader        = new CodeReader(sourceCode);
            var declarationReader = new HtmlDeclarationReader(codeReader);

            var element = declarationReader.ReadTagDeclaration();

            Assert.AreEqual(" dada\n", element.InnerText);
            Assert.AreEqual(DeclarationType.TextElement, element.Type);

            element = declarationReader.ReadTagDeclaration();
            Assert.AreEqual("div", element.Name);
            Assert.AreEqual(DeclarationType.OpenTag, element.Type);
            Assert.AreEqual(3, element.Attributes.Count);
            Assert.AreEqual("login box1", element.Attributes["class"]);
            Assert.AreEqual("div1", element.Attributes["id"]);
            Assert.AreEqual("salut, ça va?", element.Attributes["data-tooltip"]);

            element = declarationReader.ReadTagDeclaration();
            Assert.IsNull(element.Name);
            Assert.AreEqual("login: \n\t romcy", element.InnerText);
            Assert.AreEqual(DeclarationType.TextElement, element.Type);

            element = declarationReader.ReadTagDeclaration();
            Assert.AreEqual("div", element.Name);
            Assert.AreEqual(DeclarationType.CloseTag, element.Type);

            element = declarationReader.ReadTagDeclaration();
            Assert.AreEqual("img", element.Name);
            Assert.AreEqual(DeclarationType.SelfClosedTag, element.Type);
            Assert.AreEqual("http://popo.fr/titi.gif", element.Attributes["src"]);

            element = declarationReader.ReadTagDeclaration();
            Assert.IsNull(element);
        }