public virtual void DropsUnresolvableRelativeLinks() { String html = "<a href='/foo'>Link</a>"; String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, Whitelist.Basic()); NUnit.Framework.Assert.AreEqual("<a rel=\"nofollow\">Link</a>", clean); }
public virtual void TestDropsUnknownTags() { String h = "<p><custom foo=true>Test</custom></p>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<p>Test</p>", cleanHtml); }
public virtual void TestDropImageScript() { String h = "<IMG SRC=\"javascript:alert('XSS')\">"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<img>", cleanHtml); }
public virtual void TestCleanJavascriptHref() { String h = "<A HREF=\"javascript:document.location='http://www.google.com/'\">XSS</A>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<a>XSS</a>", cleanHtml); }
public virtual void TestDropXmlProc() { String h = "<?import namespace=\"xss\"><p>Hello</p>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<p>Hello</p>", cleanHtml); }
public virtual void TestDropScript() { String h = "<SCRIPT SRC=//ha.ckers.org/.j><SCRIPT>alert(/XSS/.source)</SCRIPT>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("", cleanHtml); }
public virtual void TestDropComments() { String h = "<p>Hello<!-- no --></p>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<p>Hello</p>", cleanHtml); }
public virtual void SimpleBehaviourTest() { String h = "<div><p class=foo><a href='http://evil.com'>Hello <b id=bar>there</b>!</a></div>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.SimpleText()); NUnit.Framework.Assert.AreEqual("Hello <b>there</b>!", TextUtil.StripNewlines(cleanHtml)); }
public virtual void SimpleBehaviourTest2() { String h = "Hello <b>there</b>!"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.SimpleText()); NUnit.Framework.Assert.AreEqual("Hello <b>there</b>!", TextUtil.StripNewlines(cleanHtml)); }
public virtual void TestHandlesEmptyAttributes() { String h = "<img alt=\"\" src= unknown=''>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.BasicWithImages()); NUnit.Framework.Assert.AreEqual("<img alt=\"\">", cleanHtml); }
public virtual void BasicBehaviourTest() { String h = "<div><p><a href='javascript:sendAllMoney()'>Dodgy</a> <A HREF='HTTP://nice.com/'>Nice</a></p><blockquote>Hello</blockquote>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic()); NUnit.Framework.Assert.AreEqual("<p><a rel=\"nofollow\">Dodgy</a> <a href=\"http://nice.com/\" rel=\"nofollow\">Nice</a></p><blockquote>Hello</blockquote>" , TextUtil.StripNewlines(cleanHtml)); }
public virtual void TestRelaxed() { String h = "<h1>Head</h1><table><tr><td>One<td>Two</td></tr></table>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<h1>Head</h1><table><tbody><tr><td>One</td><td>Two</td></tr></tbody></table>" , TextUtil.StripNewlines(cleanHtml)); }
public virtual void TestRemoveTags() { String h = "<div><p><A HREF='HTTP://nice.com'>Nice</a></p><blockquote>Hello</blockquote>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic().RemoveTags("a")); NUnit.Framework.Assert.AreEqual("<p>Nice</p><blockquote>Hello</blockquote>", TextUtil.StripNewlines(cleanHtml )); }
public virtual void TestScriptTagInWhiteList() { Whitelist whitelist = Whitelist.Relaxed(); whitelist.AddTags("script"); NUnit.Framework.Assert.IsTrue(iText.StyledXmlParser.Jsoup.Jsoup.IsValid("Hello<script>alert('Doh')</script>World !" , whitelist)); }
public virtual void BasicWithImagesTest() { String h = "<div><p><img src='http://example.com/' alt=Image></p><p><img src='ftp://ftp.example.com'></p></div>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.BasicWithImages()); NUnit.Framework.Assert.AreEqual("<p><img src=\"http://example.com/\" alt=\"Image\"></p><p><img></p>", TextUtil .StripNewlines(cleanHtml)); }
public virtual void TestRemoveAttributes() { String h = "<div><p>Nice</p><blockquote cite='http://example.com/quotations'>Hello</blockquote>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic().RemoveAttributes("blockquote" , "cite")); NUnit.Framework.Assert.AreEqual("<p>Nice</p><blockquote>Hello</blockquote>", TextUtil.StripNewlines(cleanHtml )); }
public virtual void TestRemoveProtocols() { String h = "<p>Contact me <a href='mailto:[email protected]'>here</a></p>"; String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(h, Whitelist.Basic().RemoveProtocols("a", "href" , "ftp", "mailto")); NUnit.Framework.Assert.AreEqual("<p>Contact me <a rel=\"nofollow\">here</a></p>", TextUtil.StripNewlines(cleanHtml )); }
public virtual void AddsTagOnAttributesIfNotSet() { String html = "<p class='foo' src='bar'>One</p>"; Whitelist whitelist = new Whitelist().AddAttributes("p", "class"); // ^^ whitelist does not have explicit tag add for p, inferred from add attributes. String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, whitelist); NUnit.Framework.Assert.AreEqual("<p class=\"foo\">One</p>", clean); }
public virtual void HandlesAllPseudoTag() { String html = "<p class='foo' src='bar'><a class='qux'>link</a></p>"; Whitelist whitelist = new Whitelist().AddAttributes(":all", "class").AddAttributes("p", "style").AddTags("p" , "a"); String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, whitelist); NUnit.Framework.Assert.AreEqual("<p class=\"foo\"><a class=\"qux\">link</a></p>", clean); }
public virtual void HandlesCustomProtocols() { String html = "<img src='cid:12345' /> <img src='data:gzzt' />"; String dropped = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, Whitelist.BasicWithImages()); NUnit.Framework.Assert.AreEqual("<img> \n<img>", dropped); String preserved = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, Whitelist.BasicWithImages().AddProtocols( "img", "src", "cid", "data")); NUnit.Framework.Assert.AreEqual("<img src=\"cid:12345\"> \n<img src=\"data:gzzt\">", preserved); }
public virtual void HandlesFramesets() { String dirty = "<html><head><script></script><noscript></noscript></head><frameset><frame src=\"foo\" /><frame src=\"foo\" /></frameset></html>"; String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(dirty, Whitelist.Basic()); NUnit.Framework.Assert.AreEqual("", clean); // nothing good can come out of that Document dirtyDoc = iText.StyledXmlParser.Jsoup.Jsoup.Parse(dirty); Document cleanDoc = new Cleaner(Whitelist.Basic()).Clean(dirtyDoc); NUnit.Framework.Assert.IsFalse(cleanDoc == null); NUnit.Framework.Assert.AreEqual(0, cleanDoc.Body().ChildNodeSize()); }
public virtual void TestCleanAnchorProtocol() { String validAnchor = "<a href=\"#valid\">Valid anchor</a>"; String invalidAnchor = "<a href=\"#anchor with spaces\">Invalid anchor</a>"; // A Whitelist that does not allow anchors will strip them out. String cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(validAnchor, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<a>Valid anchor</a>", cleanHtml); cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(invalidAnchor, Whitelist.Relaxed()); NUnit.Framework.Assert.AreEqual("<a>Invalid anchor</a>", cleanHtml); // A Whitelist that allows them will keep them. Whitelist relaxedWithAnchor = Whitelist.Relaxed().AddProtocols("a", "href", "#"); cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(validAnchor, relaxedWithAnchor); NUnit.Framework.Assert.AreEqual(validAnchor, cleanHtml); // An invalid anchor is never valid. cleanHtml = iText.StyledXmlParser.Jsoup.Jsoup.Clean(invalidAnchor, relaxedWithAnchor); NUnit.Framework.Assert.AreEqual("<a>Invalid anchor</a>", cleanHtml); }
public virtual void ResolvesRelativeLinks() { String html = "<a href='/foo'>Link</a><img src='/bar'>"; String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://example.com/", Whitelist.BasicWithImages ()); NUnit.Framework.Assert.AreEqual("<a href=\"http://example.com/foo\" rel=\"nofollow\">Link</a>\n<img src=\"http://example.com/bar\">" , clean); }
public virtual void CleansInternationalText() { NUnit.Framework.Assert.AreEqual("привет", iText.StyledXmlParser.Jsoup.Jsoup.Clean("привет", Whitelist.None ())); }
public virtual void SupplyOutputSettings() { // test that one can override the default document output settings OutputSettings os = new OutputSettings(); os.PrettyPrint(false); os.EscapeMode(Entities.EscapeMode.extended); os.Charset("ascii"); String html = "<div><p>ℬ</p></div>"; String customOut = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://foo.com/", Whitelist.Relaxed(), os ); String defaultOut = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://foo.com/", Whitelist.Relaxed()); NUnit.Framework.Assert.AreNotSame(defaultOut, customOut); NUnit.Framework.Assert.AreEqual("<div><p>ℬ</p></div>", customOut); NUnit.Framework.Assert.AreEqual("<div>\n" + " <p>ℬ</p>\n" + "</div>", defaultOut); os.Charset("ASCII"); os.EscapeMode(Entities.EscapeMode.@base); String customOut2 = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://foo.com/", Whitelist.Relaxed(), os); NUnit.Framework.Assert.AreEqual("<div><p>ℬ</p></div>", customOut2); }
public virtual void TestIsValid() { String ok = "<p>Test <b><a href='http://example.com/'>OK</a></b></p>"; String nok1 = "<p><script></script>Not <b>OK</b></p>"; String nok2 = "<p align=right>Test Not <b>OK</b></p>"; String nok3 = "<!-- comment --><p>Not OK</p>"; // comments and the like will be cleaned NUnit.Framework.Assert.IsTrue(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(ok, Whitelist.Basic())); NUnit.Framework.Assert.IsFalse(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(nok1, Whitelist.Basic())); NUnit.Framework.Assert.IsFalse(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(nok2, Whitelist.Basic())); NUnit.Framework.Assert.IsFalse(iText.StyledXmlParser.Jsoup.Jsoup.IsValid(nok3, Whitelist.Basic())); }
public virtual void PreservesRelativeLinksIfConfigured() { String html = "<a href='/foo'>Link</a><img src='/bar'> <img src='javascript:alert()'>"; String clean = iText.StyledXmlParser.Jsoup.Jsoup.Clean(html, "http://example.com/", Whitelist.BasicWithImages ().PreserveRelativeLinks(true)); NUnit.Framework.Assert.AreEqual("<a href=\"/foo\" rel=\"nofollow\">Link</a>\n<img src=\"/bar\"> \n<img>", clean); }
/// <summary>Create a new cleaner, that sanitizes documents using the supplied whitelist.</summary> /// <param name="whitelist">white-list to clean with</param> public Cleaner(Whitelist whitelist) { Validate.NotNull(whitelist); this.whitelist = whitelist; }