/// <summary> /// Get safe HTML from untrusted input HTML, /// by parsing input HTML and filtering it through a white-list of permitted tags and attributes. /// </summary> /// <param name="bodyHtml">input untrusted HTML (body fragment)</param> /// <param name="baseUri">URL to resolve relative URLs against</param> /// <param name="whitelist">white-list of permitted HTML elements</param> /// <param name="outputSettings">document output settings; use to control pretty-printing and entity escape modes</param> /// <returns>safe HTML (body fragment)</returns> /// <seealso cref="Supremes.Safety.Cleaner.Clean(Supremes.Nodes.Document)">Supremes.Safety.Cleaner.Clean(Supremes.Nodes.Document)</seealso> public static string Clean(string bodyHtml, string baseUri, Whitelist whitelist, DocumentOutputSettings outputSettings) { Document dirty = ParseBodyFragment(bodyHtml, baseUri); Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.Clean(dirty); clean.OutputSettings = outputSettings; return clean.Body.Html; }
/// <summary> /// Test if the input HTML has only tags and attributes allowed by the Whitelist. /// </summary> /// <remarks> /// Useful for form validation. The input HTML should /// still be run through the cleaner to set up enforced attributes, and to tidy the output. /// </remarks> /// <param name="bodyHtml">HTML to test</param> /// <param name="whitelist">whitelist to test against</param> /// <returns>true if no tags or attributes were removed; false otherwise</returns> /// <seealso cref="Clean(string, Supremes.Safety.Whitelist)"></seealso> public static bool IsValid(string bodyHtml, Whitelist whitelist) { Document dirty = ParseBodyFragment(bodyHtml, string.Empty); Cleaner cleaner = new Cleaner(whitelist); return cleaner.IsValid(dirty); }
/// <summary> /// Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted /// tags and attributes. /// </summary> /// <param name="bodyHtml">input untrusted HTML (body fragment)</param> /// <param name="whitelist">white-list of permitted HTML elements</param> /// <returns>safe HTML (body fragment)</returns> /// <seealso cref="Supremes.Safety.Cleaner.Clean(Supremes.Nodes.Document)"> /// Supremes.Safety.Cleaner.Clean(Supremes.Nodes.Document) /// </seealso> public static string Clean(string bodyHtml, Whitelist whitelist) { return Clean(bodyHtml, string.Empty, whitelist); }
/// <summary> /// Create a new cleaner, that sanitizes documents using the supplied whitelist. /// </summary> /// <param name="whitelist">white-list to clean with</param> public Cleaner(Whitelist whitelist) { Validate.NotNull(whitelist); this.whitelist = whitelist; }
public void AddsTagOnAttributesIfNotSet() { string html = "<p class='foo' src='bar'>One</p>"; Whitelist whitelist = new Whitelist() .AddAttributes("p", "class"); // ^^ whitelist does not have explicit tag add for p, inferred from add attributes. string clean = Dcsoup.Clean(html, whitelist); Assert.AreEqual("<p class=\"foo\">One</p>", clean); }
public void HandlesAllPseudoTag() { string html = "<p class='foo' src='bar'><a class='qux'>link</a></p>"; Whitelist whitelist = new Whitelist() .AddAttributes(":all", "class") .AddAttributes("p", "style") .AddTags("p", "a"); string clean = Dcsoup.Clean(html, whitelist); Assert.AreEqual("<p class=\"foo\"><a class=\"qux\">link</a></p>", clean); }