コード例 #1
0
ファイル: Cleaner.cs プロジェクト: milenstack/NSoup
        /// <summary>
        /// Iterates the input and copies trusted nodes (tags, attributes, text) into the destination.
        /// </summary>
        /// <param name="source">source of HTML</param>
        /// <param name="dest">destination element to copy into</param>
        /// <returns>number of discarded elements (that were considered unsafe)</returns>
        private int CopySafeNodes(Element source, Element dest)
        {
            IList <Node> sourceChildren = source.ChildNodes;
            int          numDiscarded   = 0;

            foreach (Node sourceChild in sourceChildren)
            {
                if (sourceChild is Element)
                {
                    Element sourceEl = (Element)sourceChild;

                    if (_whitelist.IsSafeTag(sourceEl.TagName()))
                    { // safe, clone and copy safe attrs
                        ElementMeta meta      = CreateSafeElement(sourceEl);
                        Element     destChild = meta.Element;
                        dest.AppendChild(destChild);

                        numDiscarded += meta.NumAttributesDiscarded;
                        numDiscarded += CopySafeNodes(sourceEl, destChild); // recurs
                    }
                    else
                    { // not a safe tag, but it may have children (els or text) that are, so recurse
                        numDiscarded++;
                        numDiscarded += CopySafeNodes(sourceEl, dest);
                    }
                }
                else if (sourceChild is TextNode)
                {
                    TextNode sourceText = (TextNode)sourceChild;
                    TextNode destText   = new TextNode(sourceText.GetWholeText(), sourceChild.BaseUri);
                    dest.AppendChild(destText);
                } // else, we don't care about comments, xml proc instructions, etc
            }
            return(numDiscarded);
        }
コード例 #2
0
ファイル: TextNodeTest.cs プロジェクト: yanyuanfen2015/NSoup
        public void testSplitText()
        {
            Document doc  = NSoup.NSoupClient.Parse("<div>Hello there</div>");
            Element  div  = doc.Select("div").First;
            TextNode tn   = (TextNode)div.ChildNodes[0];
            TextNode tail = tn.SplitText(6);

            Assert.AreEqual("Hello ", tn.GetWholeText());
            Assert.AreEqual("there", tail.GetWholeText());
            tail.Text("there!");
            Assert.AreEqual("Hello there!", div.Text());
            Assert.IsTrue(tn.ParentNode == tail.ParentNode);
        }
コード例 #3
0
ファイル: HtmlParserTest.cs プロジェクト: jonnybot0/NSoup
        public void parsesUnterminatedComments()
        {
            string   html = "<p>Hello<!-- <tr><td>";
            Document doc  = NSoupClient.Parse(html);
            Element  p    = doc.GetElementsByTag("p")[0];

            Assert.AreEqual("Hello", p.Text());
            TextNode text = (TextNode)p.ChildNodes[0];

            Assert.AreEqual("Hello", text.GetWholeText());
            Comment comment = (Comment)p.ChildNodes[1];

            Assert.AreEqual(" <tr><td>", comment.GetData());
        }
コード例 #4
0
ファイル: HtmlParserTest.cs プロジェクト: jonnybot0/NSoup
        public void parsesComments()
        {
            string   html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>";
            Document doc  = NSoupClient.Parse(html);

            Element body    = doc.Body;
            Comment comment = (Comment)body.ChildNodes[1]; // comment should not be sub of img, as it's an empty tag

            Assert.AreEqual(" <table><tr><td></table> ", comment.GetData());
            Element  p    = body.Child(1);
            TextNode text = (TextNode)p.ChildNodes[0];

            Assert.AreEqual("Hello", text.GetWholeText());
        }
コード例 #5
0
ファイル: Cleaner.cs プロジェクト: ReaZhuang/itext7-dotnet
 public void Head(iText.StyledXmlParser.Jsoup.Nodes.Node source, int depth)
 {
     if (source is iText.StyledXmlParser.Jsoup.Nodes.Element)
     {
         iText.StyledXmlParser.Jsoup.Nodes.Element sourceEl = (iText.StyledXmlParser.Jsoup.Nodes.Element)source;
         if (this._enclosing.whitelist.IsSafeTag(sourceEl.TagName()))
         {
             // safe, clone and copy safe attrs
             Cleaner.ElementMeta meta = this._enclosing.CreateSafeElement(sourceEl);
             iText.StyledXmlParser.Jsoup.Nodes.Element destChild = meta.el;
             this.destination.AppendChild(destChild);
             this.numDiscarded += meta.numAttribsDiscarded;
             this.destination   = destChild;
         }
         else
         {
             if (source != this.root)
             {
                 // not a safe tag, so don't add. don't count root against discarded.
                 this.numDiscarded++;
             }
         }
     }
     else
     {
         if (source is TextNode)
         {
             TextNode sourceText = (TextNode)source;
             TextNode destText   = new TextNode(sourceText.GetWholeText(), source.BaseUri());
             this.destination.AppendChild(destText);
         }
         else
         {
             if (source is DataNode && this._enclosing.whitelist.IsSafeTag(source.Parent().NodeName()))
             {
                 DataNode sourceData = (DataNode)source;
                 DataNode destData   = new DataNode(sourceData.GetWholeData(), source.BaseUri());
                 this.destination.AppendChild(destData);
             }
             else
             {
                 // else, we don't care about comments, xml proc instructions, etc
                 this.numDiscarded++;
             }
         }
     }
 }
コード例 #6
0
 /* (non-Javadoc)
  * @see com.itextpdf.styledxmlparser.html.node.ITextNode#wholeText()
  */
 public virtual String WholeText()
 {
     return(textNode.GetWholeText());
 }