Exemple #1
0
        public void TestDeserializeDocument()
        {
            var serializer = new HSerializer();

            var hdoc = serializer.DeserializeDocument(new StringReader("<html><body><h1>Document</h1><p>Content &amp; more &entity;.</p></body></html>"));

            Assert.Same(Encoding.UTF8, hdoc.Encoding);

            // Document with one root
            Assert.Equal(1, hdoc.Nodes().Count());
            Assert.Equal("html", hdoc.Root.Name);

            // Root with one body
            Assert.Equal(1, hdoc.Root.Nodes().Count());
            HElement body = hdoc.Root.FirstNode as HElement;

            Assert.NotNull(body);
            Assert.Equal("body", body.Name);

            // Body contains two elements
            Assert.Equal(2, body.Nodes().Count());
            var elms = body.Elements().ToArray();

            Assert.Equal(2, elms.Length);

            // First h1
            Assert.Equal("h1", elms[0].Name);
            Assert.Equal(1, elms[0].Nodes().Count());
            Assert.IsType <HText>(elms[0].FirstNode);
            Assert.Equal("Document", ((HText)elms[0].FirstNode).Value);

            // Second p
            Assert.Equal("p", elms[1].Name);
            Assert.Equal(1, elms[1].Nodes().Count());
            Assert.IsType <HText>(elms[1].FirstNode);
            Assert.Equal("Content & more &entity;.", ((HText)elms[1].FirstNode).Value);

            // Test from stream
            using (var ms = new MemoryStream(Encoding.ASCII.GetBytes("<html><body><h1>Document</h1><p>Content &amp; more &entity;.</p></body></html>")))
            {
                serializer.RemoveUnknownOrInvalidEntities = true;
                hdoc = serializer.DeserializeDocument(new StreamReader(ms, Encoding.ASCII));
                Assert.Same(Encoding.ASCII, hdoc.Encoding);
                Assert.Equal(6, hdoc.DescendantNodes().Count());
                Assert.Equal(0, hdoc.ParseErrors.Length);

                HElement elm = hdoc.Root.Elements("body").First().Elements().Last();
                Assert.Equal("p", elm.Name);
                Assert.Equal(1, elm.Nodes().Count());
                Assert.IsType <HText>(elm.FirstNode);
                Assert.Equal("Content & more .", ((HText)elm.FirstNode).Value);
            }

            Assert.Throws <ArgumentNullException>(() => serializer.DeserializeDocument(null));
        }
Exemple #2
0
        public void TestDeserializeDocument_WithErrors()
        {
            var serializer = new HSerializer();

            var hdoc = serializer.DeserializeDocument(new StringReader("<html><body><h1>Document</h1><p class=>Content &amp; more.</p></body></html>"));

            Assert.Same(Encoding.UTF8, hdoc.Encoding);

            // Document with one root
            Assert.Equal(1, hdoc.Nodes().Count());
            Assert.Equal("html", hdoc.Root.Name);

            // Root with one body
            Assert.Equal(1, hdoc.Root.Nodes().Count());
            HElement body = hdoc.Root.FirstNode as HElement;

            Assert.NotNull(body);
            Assert.Equal("body", body.Name);

            // Body contains two elements
            Assert.Equal(2, body.Nodes().Count());
            var elms = body.Elements().ToArray();

            Assert.Equal(2, elms.Length);

            // First h1
            Assert.Equal("h1", elms[0].Name);
            Assert.Equal(1, elms[0].Nodes().Count());
            Assert.IsType <HText>(elms[0].FirstNode);
            Assert.Equal("Document", ((HText)elms[0].FirstNode).Value);

            // Second p
            Assert.Equal("p", elms[1].Name);
            Assert.Equal(1, elms[1].Nodes().Count());
            Assert.IsType <HText>(elms[1].FirstNode);
            Assert.Equal("Content & more.", ((HText)elms[1].FirstNode).Value);

            // Check errors
            Assert.Equal(1, hdoc.ParseErrors.Length);
            Assert.Equal("Attribute value expected.", hdoc.ParseErrors[0].Message);
        }
Exemple #3
0
        public void TestDeserializeTestPage1()
        {
            String pageContent;

            using (var pageStream = this.GetType().Assembly.GetManifestResourceStream("HDoc.Tests.Resources.TestPage1.html"))
                using (var reader = new StreamReader(pageStream))
                    pageContent = reader.ReadToEnd();

            using (var pageStream = this.GetType().Assembly.GetManifestResourceStream("HDoc.Tests.Resources.TestPage1.html"))
                using (var reader = new StreamReader(pageStream))
                {
                    var serializer = new HSerializer();
                    var doc        = serializer.DeserializeDocument(reader);

                    Assert.Same(Encoding.UTF8, doc.Encoding);
                    Assert.Null(doc.XmlDeclaration);
                    Assert.Equal(StandardDoctype.Html5, doc.DocumentType.StandardType);

                    var nodes = doc.Nodes().ToArray();
                    Assert.Equal(3, nodes.Length);
                    Assert.IsType <HDocumentType>(nodes[0]);
                    Assert.IsType <HText>(nodes[1]);
                    Assert.IsType <HElement>(nodes[2]);

                    nodes = doc.DescendantNodes().ToArray();
                    Assert.Equal(129, nodes.Length);

                    var elms = doc.Descendants().ToArray();
                    Assert.Equal(46, elms.Length);

                    // Correct false HTML source
                    String tmp = pageContent
                                 .Replace("src=\"http://placekitten.com/g/64/64\">", "src=\"http://placekitten.com/g/64/64\" />")
                                 .Replace("a = b < c;", "a = b &lt; c;")
                                 .Replace("s = \"&lt;html&gt;\";", "s = &quot;&lt;html&gt;&quot;;")
                                 .Replace("français", "fran&ccedil;ais")
                    ;
                    Assert.Equal(tmp, serializer.Serialize(doc));
                }
        }