/// <summary> /// Determines if the input document is valid, against the whitelist. /// </summary> /// <remarks> /// It is considered valid if all the tags and attributes /// in the input HTML are allowed by the whitelist. /// <p/> /// This method can be used as a validator for user input forms. /// An invalid document will still be cleaned successfully /// using the /// <see cref="Clean(Supremes.Nodes.Document)">Clean(Supremes.Nodes.Document)</see> /// document. If using as a validator, it is recommended to still clean the document /// to ensure enforced attributes are set correctly, and that the output is tidied. /// </remarks> /// <param name="dirtyDocument">document to test</param> /// <returns>true if no tags or attributes need to be removed; false if they do</returns> public bool IsValid(Document dirtyDocument) { Validate.NotNull(dirtyDocument); Document clean = Document.CreateShell(dirtyDocument.BaseUri); int numDiscarded = CopySafeNodes(dirtyDocument.Body, clean.Body); return numDiscarded == 0; }
// current doc we are building into // the stack of open elements // current base uri, for creating new elements // currentToken is used only for error tracking. // null when not tracking errors internal virtual void InitialiseParse(string input, string baseUri, ParseErrorList errors) { Validate.NotNull(input, "String input must not be null"); Validate.NotNull(baseUri, "BaseURI must not be null"); doc = new Document(baseUri); reader = new CharacterReader(input); this.errors = errors; tokeniser = new Tokeniser(reader, errors); stack = new DescendableLinkedList<Element>(); this.baseUri = baseUri; }
/// <summary> /// Creates a new, clean document, from the original dirty document, /// containing only elements allowed by the whitelist. /// </summary> /// <remarks> /// The original document is not modified. /// Only elements from the dirt document's <c>body</c> are used. /// </remarks> /// <param name="dirtyDocument">Untrusted base document to clean.</param> /// <returns>cleaned document.</returns> public Document Clean(Document dirtyDocument) { Validate.NotNull(dirtyDocument); Document clean = Document.CreateShell(dirtyDocument.BaseUri); if (dirtyDocument.Body != null) { // frameset documents won't have a body. the clean doc will have empty body. CopySafeNodes(dirtyDocument.Body, clean.Body); } return clean; }
public void Init() { html = Dcsoup.Parse(htmlString); }