public static SanitizeResult sanitizer(String html, Regex allowedTags, Regex forbiddenTags, bool isConsiderAllowedTag = false) { SanitizeResult ret = new SanitizeResult(); Stack <String> openTags = new Stack <string>(); if (String.IsNullOrEmpty(html)) { return(ret); } List <String> tokens = tokenize(html); // ------------------- LOOP for every token -------------------------- for (int i = 0; i < tokens.Count; i++) { String token = tokens[i]; bool isAcceptedToken = false; Match startMatcher = tagStartPattern.Match(token); Match endMatcher = tagClosePattern.Match(token); //-------------------------------------------------------------------------------- COMMENT <!-- ......... --> if (commentPattern.Match(token).Success) { ret.val = ret.val + token + (token.EndsWith("-->") ? "" : "-->"); ret.invalidTags.Add(token + (token.EndsWith("-->") ? "" : "-->")); continue; //-------------------------------------------------------------------------------- OPEN TAG <tag .........> } else if (startMatcher.Success) { //tag name extraction String tag = startMatcher.Groups[1].Value.ToLower(); //----------------------------------------------------- FORBIDDEN TAG <script .........> if (forbiddenTags.Match(tag).Success) { ret.invalidTags.Add("<" + tag + ">"); continue; // -------------------------------------------------- WELL KNOWN TAG } else if (allowedTags.Match(tag).Success) { if (!isConsiderAllowedTag) { ret.invalidTags.Add("<" + tag + ">"); continue; } String cleanToken = "<" + tag; String tokenBody = startMatcher.Groups[2].Value; //first test table consistency //table tbody tfoot thead th tr td if ("thead".Equals(tag) || "tbody".Equals(tag) || "tfoot".Equals(tag) || "tr".Equals(tag)) { if (openTags.Select(t => t == "table").Count() <= 0) { ret.invalidTags.Add("<" + tag + ">"); continue; } } else if ("td".Equals(tag) || "th".Equals(tag)) { if (openTags.Count(t => t == "tr") <= 0) { ret.invalidTags.Add("<" + tag + ">"); continue; } } // then test properties //Match attributes = attributesPattern.Match(tokenBody); var attributes = attributesPattern.Matches(tokenBody); bool foundURL = false; // URL flag foreach (Match attribute in attributes) //while (attributes.find()) { String attr = attribute.Groups[1].Value.ToLower(); String val = attribute.Groups[2].Value; if (val.Contains("&{")) { ret.invalidTags.Add(attr + " " + val); } // we will accept href in case of <A> else if ("a".Equals(tag) && "href".Equals(attr)) { // <a href="......"> try { var url = new Uri(val); if (url.Scheme == Uri.UriSchemeHttp || url.Scheme == Uri.UriSchemeHttps || url.Scheme == Uri.UriSchemeMailto) { foundURL = true; } else { ret.invalidTags.Add(attr + " " + val); val = ""; } } catch { ret.invalidTags.Add(attr + " " + val); val = ""; } } else if ((tag == "img" || tag == "embed") && "src".Equals(attr)) { // <img src="......"> try { var url = new Uri(val); if (url.Scheme == Uri.UriSchemeHttp || url.Scheme == Uri.UriSchemeHttps) { foundURL = true; } else { ret.invalidTags.Add(attr + " " + val); val = ""; } } catch { ret.invalidTags.Add(attr + " " + val); val = ""; } } else if ("href".Equals(attr) || "src".Equals(attr)) { // <tag src/href="......"> skipped ret.invalidTags.Add(tag + " " + attr + " " + val); continue; } else if (attr == "width" || attr == "height") { // <tag width/height="......"> Regex r = new Regex("\\d+%|\\d+$"); if (!r.Match(val.ToLower()).Success) { // test numeric values ret.invalidTags.Add(tag + " " + attr + " " + val); continue; } } else if ("style".Equals(attr)) { // <tag style="......"> // then test properties var styles = stylePattern.Matches(val); String cleanStyle = ""; foreach (Match style in styles) //while (styles.find()) { String styleName = style.Groups[1].Value.ToLower(); String styleValue = style.Groups[2].Value; // suppress invalid styles values if (forbiddenStylePattern.Match(styleValue).Success) { ret.invalidTags.Add(tag + " " + attr + " " + styleValue); continue; } // check if valid url Match urlStyleMatcher = urlStylePattern.Match(styleValue); if (urlStyleMatcher.Success) { try { String url = urlStyleMatcher.Groups[1].Value; var uri = new Uri(url); if (!(uri.Scheme == Uri.UriSchemeHttp || uri.Scheme == Uri.UriSchemeHttps)) { ret.invalidTags.Add(tag + " " + attr + " " + styleValue); continue; } } catch { ret.invalidTags.Add(tag + " " + attr + " " + styleValue); continue; } } cleanStyle = cleanStyle + styleName + ":" + encode(styleValue) + ";"; } val = cleanStyle; } else if (attr.StartsWith("on")) { // skip all javascript events ret.invalidTags.Add(tag + " " + attr + " " + val); continue; } else { // by default encode all properies val = encode(val); } cleanToken = cleanToken + " " + attr + "=\"" + val + "\""; } cleanToken = cleanToken + ">"; isAcceptedToken = true; // for <img> and <a> if ((tag == "a" || tag == "img" || tag == "embed") && !foundURL) { isAcceptedToken = false; cleanToken = ""; } token = cleanToken; // push the tag if require closure and it is accepted (otherwise is encoded) if (isAcceptedToken && !(standAloneTags.Match(tag).Success || selfClosed.Match(tag).Success)) { openTags.Push(tag); } // -------------------------------------------------------------------------------- UNKNOWN TAG } else { //ret.invalidTags.Add(token); //ret.val = ret.val + token; continue; } // -------------------------------------------------------------------------------- CLOSE TAG </tag> } else if (endMatcher.Success) { String tag = endMatcher.Groups[1].Value.ToLower(); //is self closing if (selfClosed.Match(tag).Success) { ret.invalidTags.Add(token); continue; } if (forbiddenTags.Match(tag).Success) { ret.invalidTags.Add("/" + tag); continue; } if (!allowedTags.Match(tag).Success) { //ret.invalidTags.Add(token); //ret.val = ret.val + token; continue; } else { String cleanToken = ""; // check tag position in the stack int pos = -1; bool found = false; foreach (var item in openTags) { pos++; if (item == tag) { found = true; break; } } // if found on top ok if (found) { for (int k = 0; k <= pos; k++) { //pop all elements before tag and close it String poppedTag = openTags.Pop(); cleanToken = cleanToken + "</" + poppedTag + ">"; isAcceptedToken = true; } } token = cleanToken; } } ret.val = ret.val + token; if (isAcceptedToken) { ret.html = ret.html + token; //ret.text = ret.text + " "; } else { String sanToken = htmlEncodeApexesAndTags(token); ret.html = ret.html + sanToken; ret.text = ret.text + htmlEncodeApexesAndTags(removeLineFeed(token)); } } // must close remaining tags while (openTags.Count() > 0) { //pop all elements before tag and close it String poppedTag = openTags.Pop(); ret.html = ret.html + "</" + poppedTag + ">"; ret.val = ret.val + "</" + poppedTag + ">"; } //set boolean value ret.isValid = ret.invalidTags.Count == 0; return(ret); }