public void TestDeserializeDocument() { var serializer = new HSerializer(); var hdoc = serializer.DeserializeDocument(new StringReader("<html><body><h1>Document</h1><p>Content & more &entity;.</p></body></html>")); Assert.Same(Encoding.UTF8, hdoc.Encoding); // Document with one root Assert.Equal(1, hdoc.Nodes().Count()); Assert.Equal("html", hdoc.Root.Name); // Root with one body Assert.Equal(1, hdoc.Root.Nodes().Count()); HElement body = hdoc.Root.FirstNode as HElement; Assert.NotNull(body); Assert.Equal("body", body.Name); // Body contains two elements Assert.Equal(2, body.Nodes().Count()); var elms = body.Elements().ToArray(); Assert.Equal(2, elms.Length); // First h1 Assert.Equal("h1", elms[0].Name); Assert.Equal(1, elms[0].Nodes().Count()); Assert.IsType <HText>(elms[0].FirstNode); Assert.Equal("Document", ((HText)elms[0].FirstNode).Value); // Second p Assert.Equal("p", elms[1].Name); Assert.Equal(1, elms[1].Nodes().Count()); Assert.IsType <HText>(elms[1].FirstNode); Assert.Equal("Content & more &entity;.", ((HText)elms[1].FirstNode).Value); // Test from stream using (var ms = new MemoryStream(Encoding.ASCII.GetBytes("<html><body><h1>Document</h1><p>Content & more &entity;.</p></body></html>"))) { serializer.RemoveUnknownOrInvalidEntities = true; hdoc = serializer.DeserializeDocument(new StreamReader(ms, Encoding.ASCII)); Assert.Same(Encoding.ASCII, hdoc.Encoding); Assert.Equal(6, hdoc.DescendantNodes().Count()); Assert.Equal(0, hdoc.ParseErrors.Length); HElement elm = hdoc.Root.Elements("body").First().Elements().Last(); Assert.Equal("p", elm.Name); Assert.Equal(1, elm.Nodes().Count()); Assert.IsType <HText>(elm.FirstNode); Assert.Equal("Content & more .", ((HText)elm.FirstNode).Value); } Assert.Throws <ArgumentNullException>(() => serializer.DeserializeDocument(null)); }
public void TestDeserializeDocument_WithErrors() { var serializer = new HSerializer(); var hdoc = serializer.DeserializeDocument(new StringReader("<html><body><h1>Document</h1><p class=>Content & more.</p></body></html>")); Assert.Same(Encoding.UTF8, hdoc.Encoding); // Document with one root Assert.Equal(1, hdoc.Nodes().Count()); Assert.Equal("html", hdoc.Root.Name); // Root with one body Assert.Equal(1, hdoc.Root.Nodes().Count()); HElement body = hdoc.Root.FirstNode as HElement; Assert.NotNull(body); Assert.Equal("body", body.Name); // Body contains two elements Assert.Equal(2, body.Nodes().Count()); var elms = body.Elements().ToArray(); Assert.Equal(2, elms.Length); // First h1 Assert.Equal("h1", elms[0].Name); Assert.Equal(1, elms[0].Nodes().Count()); Assert.IsType <HText>(elms[0].FirstNode); Assert.Equal("Document", ((HText)elms[0].FirstNode).Value); // Second p Assert.Equal("p", elms[1].Name); Assert.Equal(1, elms[1].Nodes().Count()); Assert.IsType <HText>(elms[1].FirstNode); Assert.Equal("Content & more.", ((HText)elms[1].FirstNode).Value); // Check errors Assert.Equal(1, hdoc.ParseErrors.Length); Assert.Equal("Attribute value expected.", hdoc.ParseErrors[0].Message); }
public void TestDeserializeTestPage1() { String pageContent; using (var pageStream = this.GetType().Assembly.GetManifestResourceStream("HDoc.Tests.Resources.TestPage1.html")) using (var reader = new StreamReader(pageStream)) pageContent = reader.ReadToEnd(); using (var pageStream = this.GetType().Assembly.GetManifestResourceStream("HDoc.Tests.Resources.TestPage1.html")) using (var reader = new StreamReader(pageStream)) { var serializer = new HSerializer(); var doc = serializer.DeserializeDocument(reader); Assert.Same(Encoding.UTF8, doc.Encoding); Assert.Null(doc.XmlDeclaration); Assert.Equal(StandardDoctype.Html5, doc.DocumentType.StandardType); var nodes = doc.Nodes().ToArray(); Assert.Equal(3, nodes.Length); Assert.IsType <HDocumentType>(nodes[0]); Assert.IsType <HText>(nodes[1]); Assert.IsType <HElement>(nodes[2]); nodes = doc.DescendantNodes().ToArray(); Assert.Equal(129, nodes.Length); var elms = doc.Descendants().ToArray(); Assert.Equal(46, elms.Length); // Correct false HTML source String tmp = pageContent .Replace("src=\"http://placekitten.com/g/64/64\">", "src=\"http://placekitten.com/g/64/64\" />") .Replace("a = b < c;", "a = b < c;") .Replace("s = \"<html>\";", "s = "<html>";") .Replace("français", "français") ; Assert.Equal(tmp, serializer.Serialize(doc)); } }