/// <summary> /// Iterates the input and copies trusted nodes (tags, attributes, text) into the destination. /// </summary> /// <param name="source">source of HTML</param> /// <param name="dest">destination element to copy into</param> /// <returns>number of discarded elements (that were considered unsafe)</returns> private int CopySafeNodes(Element source, Element dest) { IList <Node> sourceChildren = source.ChildNodes; int numDiscarded = 0; foreach (Node sourceChild in sourceChildren) { if (sourceChild is Element) { Element sourceEl = (Element)sourceChild; if (_whitelist.IsSafeTag(sourceEl.TagName())) { // safe, clone and copy safe attrs ElementMeta meta = CreateSafeElement(sourceEl); Element destChild = meta.Element; dest.AppendChild(destChild); numDiscarded += meta.NumAttributesDiscarded; numDiscarded += CopySafeNodes(sourceEl, destChild); // recurs } else { // not a safe tag, but it may have children (els or text) that are, so recurse numDiscarded++; numDiscarded += CopySafeNodes(sourceEl, dest); } } else if (sourceChild is TextNode) { TextNode sourceText = (TextNode)sourceChild; TextNode destText = new TextNode(sourceText.GetWholeText(), sourceChild.BaseUri); dest.AppendChild(destText); } // else, we don't care about comments, xml proc instructions, etc } return(numDiscarded); }
public void testSplitText() { Document doc = NSoup.NSoupClient.Parse("<div>Hello there</div>"); Element div = doc.Select("div").First; TextNode tn = (TextNode)div.ChildNodes[0]; TextNode tail = tn.SplitText(6); Assert.AreEqual("Hello ", tn.GetWholeText()); Assert.AreEqual("there", tail.GetWholeText()); tail.Text("there!"); Assert.AreEqual("Hello there!", div.Text()); Assert.IsTrue(tn.ParentNode == tail.ParentNode); }
public void parsesUnterminatedComments() { string html = "<p>Hello<!-- <tr><td>"; Document doc = NSoupClient.Parse(html); Element p = doc.GetElementsByTag("p")[0]; Assert.AreEqual("Hello", p.Text()); TextNode text = (TextNode)p.ChildNodes[0]; Assert.AreEqual("Hello", text.GetWholeText()); Comment comment = (Comment)p.ChildNodes[1]; Assert.AreEqual(" <tr><td>", comment.GetData()); }
public void parsesComments() { string html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>"; Document doc = NSoupClient.Parse(html); Element body = doc.Body; Comment comment = (Comment)body.ChildNodes[1]; // comment should not be sub of img, as it's an empty tag Assert.AreEqual(" <table><tr><td></table> ", comment.GetData()); Element p = body.Child(1); TextNode text = (TextNode)p.ChildNodes[0]; Assert.AreEqual("Hello", text.GetWholeText()); }
public void Head(iText.StyledXmlParser.Jsoup.Nodes.Node source, int depth) { if (source is iText.StyledXmlParser.Jsoup.Nodes.Element) { iText.StyledXmlParser.Jsoup.Nodes.Element sourceEl = (iText.StyledXmlParser.Jsoup.Nodes.Element)source; if (this._enclosing.whitelist.IsSafeTag(sourceEl.TagName())) { // safe, clone and copy safe attrs Cleaner.ElementMeta meta = this._enclosing.CreateSafeElement(sourceEl); iText.StyledXmlParser.Jsoup.Nodes.Element destChild = meta.el; this.destination.AppendChild(destChild); this.numDiscarded += meta.numAttribsDiscarded; this.destination = destChild; } else { if (source != this.root) { // not a safe tag, so don't add. don't count root against discarded. this.numDiscarded++; } } } else { if (source is TextNode) { TextNode sourceText = (TextNode)source; TextNode destText = new TextNode(sourceText.GetWholeText(), source.BaseUri()); this.destination.AppendChild(destText); } else { if (source is DataNode && this._enclosing.whitelist.IsSafeTag(source.Parent().NodeName())) { DataNode sourceData = (DataNode)source; DataNode destData = new DataNode(sourceData.GetWholeData(), source.BaseUri()); this.destination.AppendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc this.numDiscarded++; } } } }
/* (non-Javadoc) * @see com.itextpdf.styledxmlparser.html.node.ITextNode#wholeText() */ public virtual String WholeText() { return(textNode.GetWholeText()); }