/// <summary> /// Test if the input HTML has only tags and attributes allowed by the Whitelist. /// </summary> /// <remarks> /// Useful for form validation. The input HTML should /// still be run through the cleaner to set up enforced attributes, and to tidy the output. /// </remarks> /// <param name="bodyHtml">HTML to test</param> /// <param name="whitelist">whitelist to test against</param> /// <returns>true if no tags or attributes were removed; false otherwise</returns> /// <seealso cref="Clean(string, Supremes.Safety.Whitelist)"></seealso> public static bool IsValid(string bodyHtml, Whitelist whitelist) { Document dirty = ParseBodyFragment(bodyHtml, string.Empty); Cleaner cleaner = new Cleaner(whitelist); return cleaner.IsValid(dirty); }
internal CleaningVisitor(Cleaner _enclosing, Element root, Element destination) { this._enclosing = _enclosing; // current element to append nodes to this.root = root; this.destination = destination; }
/// <summary> /// Get safe HTML from untrusted input HTML, /// by parsing input HTML and filtering it through a white-list of permitted tags and attributes. /// </summary> /// <param name="bodyHtml">input untrusted HTML (body fragment)</param> /// <param name="baseUri">URL to resolve relative URLs against</param> /// <param name="whitelist">white-list of permitted HTML elements</param> /// <param name="outputSettings">document output settings; use to control pretty-printing and entity escape modes</param> /// <returns>safe HTML (body fragment)</returns> /// <seealso cref="Supremes.Safety.Cleaner.Clean(Supremes.Nodes.Document)">Supremes.Safety.Cleaner.Clean(Supremes.Nodes.Document)</seealso> public static string Clean(string bodyHtml, string baseUri, Whitelist whitelist, DocumentOutputSettings outputSettings) { Document dirty = ParseBodyFragment(bodyHtml, baseUri); Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.Clean(dirty); clean.OutputSettings = outputSettings; return clean.Body.Html; }
public void HandlesFramesets() { string dirty = "<html><head><script></script><noscript></noscript></head><frameset><frame src=\"foo\" /><frame src=\"foo\" /></frameset></html>"; string clean = Dcsoup.Clean(dirty, Whitelist.Basic); Assert.AreEqual("", clean); // nothing good can come out of that Document dirtyDoc = Dcsoup.Parse(dirty); Document cleanDoc = new Cleaner(Whitelist.Basic).Clean(dirtyDoc); Assert.IsFalse(cleanDoc == null); Assert.AreEqual(0, cleanDoc.Body.ChildNodeSize); }