Exemple #1
0
        public virtual void DropsUnresolvableRelativeLinks()
        {
            String html  = "<a href='/foo'>Link</a>";
            String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, Whitelist.Basic());

            NUnit.Framework.Assert.AreEqual("<a rel=\"nofollow\">Link</a>", clean);
        }
Exemple #2
0
        public virtual void TestDropsUnknownTags()
        {
            String h         = "<p><custom foo=true>Test</custom></p>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<p>Test</p>", cleanHtml);
        }
Exemple #3
0
        public virtual void TestDropImageScript()
        {
            String h         = "<IMG SRC=\"javascript:alert('XSS')\">";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<img>", cleanHtml);
        }
Exemple #4
0
        public virtual void TestCleanJavascriptHref()
        {
            String h         = "<A HREF=\"javascript:document.location='http://www.google.com/'\">XSS</A>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<a>XSS</a>", cleanHtml);
        }
Exemple #5
0
        public virtual void TestDropXmlProc()
        {
            String h         = "<?import namespace=\"xss\"><p>Hello</p>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<p>Hello</p>", cleanHtml);
        }
Exemple #6
0
        public virtual void TestDropScript()
        {
            String h         = "<SCRIPT SRC=//ha.ckers.org/.j><SCRIPT>alert(/XSS/.source)</SCRIPT>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("", cleanHtml);
        }
Exemple #7
0
        public virtual void TestDropComments()
        {
            String h         = "<p>Hello<!-- no --></p>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<p>Hello</p>", cleanHtml);
        }
Exemple #8
0
        public virtual void SimpleBehaviourTest()
        {
            String h         = "<div><p class=foo><a href='http://evil.com'>Hello <b id=bar>there</b>!</a></div>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.SimpleText());

            NUnit.Framework.Assert.AreEqual("Hello <b>there</b>!", TextUtil.StripNewlines(cleanHtml));
        }
Exemple #9
0
        public virtual void SimpleBehaviourTest2()
        {
            String h         = "Hello <b>there</b>!";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.SimpleText());

            NUnit.Framework.Assert.AreEqual("Hello <b>there</b>!", TextUtil.StripNewlines(cleanHtml));
        }
Exemple #10
0
        public virtual void TestHandlesEmptyAttributes()
        {
            String h         = "<img alt=\"\" src= unknown=''>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.BasicWithImages());

            NUnit.Framework.Assert.AreEqual("<img alt=\"\">", cleanHtml);
        }
Exemple #11
0
        public virtual void BasicBehaviourTest()
        {
            String h         = "<div><p><a href='javascript:sendAllMoney()'>Dodgy</a> <A HREF='HTTP://nice.com/'>Nice</a></p><blockquote>Hello</blockquote>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic());

            NUnit.Framework.Assert.AreEqual("<p><a rel=\"nofollow\">Dodgy</a> <a href=\"http://nice.com/\" rel=\"nofollow\">Nice</a></p><blockquote>Hello</blockquote>"
                                            , TextUtil.StripNewlines(cleanHtml));
        }
Exemple #12
0
        public virtual void TestRelaxed()
        {
            String h         = "<h1>Head</h1><table><tr><td>One<td>Two</td></tr></table>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<h1>Head</h1><table><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>"
                                            , TextUtil.StripNewlines(cleanHtml));
        }
Exemple #13
0
        public virtual void TestRemoveTags()
        {
            String h         = "<div><p><A HREF='HTTP://nice.com'>Nice</a></p><blockquote>Hello</blockquote>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic().RemoveTags("a"));

            NUnit.Framework.Assert.AreEqual("<p>Nice</p><blockquote>Hello</blockquote>", TextUtil.StripNewlines(cleanHtml
                                                                                                                ));
        }
Exemple #14
0
        public virtual void TestScriptTagInWhiteList()
        {
            Whitelist whitelist = Whitelist.Relaxed();

            whitelist.AddTags("script");
            NUnit.Framework.Assert.IsTrue(iText.StyledXmlParser.Jsoup.Jsoup.IsValid("Hello<script>alert('Doh')</script>World !"
                                                                                    , whitelist));
        }
Exemple #15
0
        public virtual void BasicWithImagesTest()
        {
            String h         = "<div><p><img src='http://example.com/' alt=Image></p><p><img src='ftp://ftp.example.com'></p></div>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.BasicWithImages());

            NUnit.Framework.Assert.AreEqual("<p><img src=\"http://example.com/\" alt=\"Image\"></p><p><img></p>", TextUtil
                                            .StripNewlines(cleanHtml));
        }
Exemple #16
0
        public virtual void TestRemoveAttributes()
        {
            String h         = "<div><p>Nice</p><blockquote cite='http://example.com/quotations'>Hello</blockquote>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic().RemoveAttributes("blockquote"
                                                                                                             , "cite"));

            NUnit.Framework.Assert.AreEqual("<p>Nice</p><blockquote>Hello</blockquote>", TextUtil.StripNewlines(cleanHtml
                                                                                                                ));
        }
Exemple #17
0
        public virtual void TestRemoveProtocols()
        {
            String h         = "<p>Contact me <a href='mailto:[email protected]'>here</a></p>";
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic().RemoveProtocols("a", "href"
                                                                                                            , "ftp", "mailto"));

            NUnit.Framework.Assert.AreEqual("<p>Contact me <a rel=\"nofollow\">here</a></p>", TextUtil.StripNewlines(cleanHtml
                                                                                                                     ));
        }
Exemple #18
0
        public virtual void AddsTagOnAttributesIfNotSet()
        {
            String    html      = "<p class='foo' src='bar'>One</p>";
            Whitelist whitelist = new Whitelist().AddAttributes("p", "class");
            // ^^ whitelist does not have explicit tag add for p, inferred from add attributes.
            String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, whitelist);

            NUnit.Framework.Assert.AreEqual("<p class=\"foo\">One</p>", clean);
        }
Exemple #19
0
        public virtual void HandlesAllPseudoTag()
        {
            String    html      = "<p class='foo' src='bar'><a class='qux'>link</a></p>";
            Whitelist whitelist = new Whitelist().AddAttributes(":all", "class").AddAttributes("p", "style").AddTags("p"
                                                                                                                     , "a");
            String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, whitelist);

            NUnit.Framework.Assert.AreEqual("<p class=\"foo\"><a class=\"qux\">link</a></p>", clean);
        }
Exemple #20
0
        public virtual void HandlesCustomProtocols()
        {
            String html    = "<img src='cid:12345' /> <img src='data:gzzt' />";
            String dropped = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, Whitelist.BasicWithImages());

            NUnit.Framework.Assert.AreEqual("<img> \n<img>", dropped);
            String preserved = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, Whitelist.BasicWithImages().AddProtocols(
                                                                           "img", "src", "cid", "data"));

            NUnit.Framework.Assert.AreEqual("<img src=\"cid:12345\"> \n<img src=\"data:gzzt\">", preserved);
        }
Exemple #21
0
        public virtual void HandlesFramesets()
        {
            String dirty = "<html><head><script></script><noscript></noscript></head><frameset><frame src=\"foo\" /><frame src=\"foo\" /></frameset></html>";
            String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(dirty, Whitelist.Basic());

            NUnit.Framework.Assert.AreEqual("", clean);
            // nothing good can come out of that
            Document dirtyDoc = iText.StyledXmlParser.Jsoup.Jsoup.Parse(dirty);
            Document cleanDoc = new Cleaner(Whitelist.Basic()).Clean(dirtyDoc);

            NUnit.Framework.Assert.IsFalse(cleanDoc == null);
            NUnit.Framework.Assert.AreEqual(0, cleanDoc.Body().ChildNodeSize());
        }
Exemple #22
0
        public virtual void TestCleanAnchorProtocol()
        {
            String validAnchor   = "<a href=\"#valid\">Valid anchor</a>";
            String invalidAnchor = "<a href=\"#anchor with spaces\">Invalid anchor</a>";
            // A Whitelist that does not allow anchors will strip them out.
            String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(validAnchor, Whitelist.Relaxed());

            NUnit.Framework.Assert.AreEqual("<a>Valid anchor</a>", cleanHtml);
            cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(invalidAnchor, Whitelist.Relaxed());
            NUnit.Framework.Assert.AreEqual("<a>Invalid anchor</a>", cleanHtml);
            // A Whitelist that allows them will keep them.
            Whitelist relaxedWithAnchor = Whitelist.Relaxed().AddProtocols("a", "href", "#");

            cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(validAnchor, relaxedWithAnchor);
            NUnit.Framework.Assert.AreEqual(validAnchor, cleanHtml);
            // An invalid anchor is never valid.
            cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(invalidAnchor, relaxedWithAnchor);
            NUnit.Framework.Assert.AreEqual("<a>Invalid anchor</a>", cleanHtml);
        }
Exemple #23
0
        public virtual void ResolvesRelativeLinks()
        {
            String html  = "<a href='/foo'>Link</a><img src='/bar'>";
            String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://example.com/", Whitelist.BasicWithImages
                                                                       ());

            NUnit.Framework.Assert.AreEqual("<a href=\"http://example.com/foo\" rel=\"nofollow\">Link</a>\n<img src=\"http://example.com/bar\">"
                                            , clean);
        }
Exemple #24
0
 public virtual void CleansInternationalText()
 {
     NUnit.Framework.Assert.AreEqual("привет", iText.StyledXmlParser.Jsoup.Jsoup.Clean("привет", Whitelist.None
                                                                                           ()));
 }
Exemple #25
0
        public virtual void SupplyOutputSettings()
        {
            // test that one can override the default document output settings
            OutputSettings os = new OutputSettings();

            os.PrettyPrint(false);
            os.EscapeMode(Entities.EscapeMode.extended);
            os.Charset("ascii");
            String html      = "<div><p>&bernou;</p></div>";
            String customOut = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://foo.com/", Whitelist.Relaxed(), os
                                                                       );
            String defaultOut = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://foo.com/", Whitelist.Relaxed());

            NUnit.Framework.Assert.AreNotSame(defaultOut, customOut);
            NUnit.Framework.Assert.AreEqual("<div><p>&bernou;</p></div>", customOut);
            NUnit.Framework.Assert.AreEqual("<div>\n" + " <p>ℬ</p>\n" + "</div>", defaultOut);
            os.Charset("ASCII");
            os.EscapeMode(Entities.EscapeMode.@base);
            String customOut2 = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://foo.com/", Whitelist.Relaxed(),
                                                                        os);

            NUnit.Framework.Assert.AreEqual("<div><p>&#x212c;</p></div>", customOut2);
        }
Exemple #26
0
        public virtual void TestIsValid()
        {
            String ok   = "<p>Test <b><a href='http://example.com/'>OK</a></b></p>";
            String nok1 = "<p><script></script>Not <b>OK</b></p>";
            String nok2 = "<p align=right>Test Not <b>OK</b></p>";
            String nok3 = "<!-- comment --><p>Not OK</p>";

            // comments and the like will be cleaned
            NUnit.Framework.Assert.IsTrue(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(ok, Whitelist.Basic()));
            NUnit.Framework.Assert.IsFalse(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(nok1, Whitelist.Basic()));
            NUnit.Framework.Assert.IsFalse(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(nok2, Whitelist.Basic()));
            NUnit.Framework.Assert.IsFalse(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(nok3, Whitelist.Basic()));
        }
Exemple #27
0
        public virtual void PreservesRelativeLinksIfConfigured()
        {
            String html  = "<a href='/foo'>Link</a><img src='/bar'> <img src='javascript:alert()'>";
            String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://example.com/", Whitelist.BasicWithImages
                                                                       ().PreserveRelativeLinks(true));

            NUnit.Framework.Assert.AreEqual("<a href=\"/foo\" rel=\"nofollow\">Link</a>\n<img src=\"/bar\"> \n<img>",
                                            clean);
        }
Exemple #28
0
 /// <summary>Create a new cleaner, that sanitizes documents using the supplied whitelist.</summary>
 /// <param name="whitelist">white-list to clean with</param>
 public Cleaner(Whitelist whitelist)
 {
     Validate.NotNull(whitelist);
     this.whitelist = whitelist;
 }