private void TruncateTag(HtmlNode node, string tagName) { HtmlAttributeCollection attributes = node.Attributes; while (attributes.Count > 0) { AddError(Constants.ERROR_ATTRIBUTE_NOT_IN_POLICY, HtmlEntityEncoder.HtmlEntityEncode(tagName), HtmlEntityEncoder.HtmlEntityEncode(attributes[0].Name), HtmlEntityEncoder.HtmlEntityEncode(attributes[0].Value)); node.Attributes.Remove(attributes[0].Name); } HtmlNodeCollection childNodes = node.ChildNodes; int j = 0; int length = childNodes.Count; for (int i = 0; i < length; i++) { HtmlNode nodeToRemove = childNodes[j]; if (nodeToRemove.NodeType != HtmlNodeType.Text) { node.RemoveChild(nodeToRemove); } else { j++; } } }
/// <summary>Invokes the CSS parser on the element.</summary> /// <param name="node">The <see cref="HtmlNode"/> to scan.</param> /// <param name="parentNode">The parent of the node.</param> /// <returns><see langword="true"/> if processing ended with no exceptions.</returns> private bool ProcessStyleTag(HtmlNode node, HtmlNode parentNode) { var styleScanner = new CssScanner(Policy); try { CleanResults cleanStyleSheet = styleScanner.ScanStyleSheet(node.FirstChild.InnerHtml); errorMessages.AddRange(cleanStyleSheet.GetErrorMessages()); /* * If IE gets an empty style tag, i.e. <style/> it will break all CSS on the page. I wish I * was kidding. So, if after validation no CSS properties are left, we would normally be left * with an empty style tag and break all CSS. To prevent that, we have this check. */ string cleanHtml = cleanStyleSheet.GetCleanHtml(); node.FirstChild.InnerHtml = string.IsNullOrEmpty(cleanHtml) ? EMPTY_CSS_COMMENT : cleanHtml; } catch (Exception exc) { if (exc is ScanException || exc is ParseException) { AddError(Constants.ERROR_CSS_TAG_MALFORMED, HtmlEntityEncoder.HtmlEntityEncode(node.FirstChild.InnerHtml)); parentNode.RemoveChild(node); return(false); } else { throw; } } return(true); }
/// <summary>Scans a CSS rule and tells if it is valid. Even if it is valid, internally it /// may have some of its properties removed if they dangerous parts are detected.</summary> /// <param name="rule">The CSS rule to scan.</param> /// <returns><c>true</c> if the rule is valid, <c>false</c> if it must be removed.</returns> private bool ScanStyleRule(ICssRule rule) { if (rule is ICssStyleRule styleRule) { ScanStyleDeclaration(styleRule.Style); } else if (rule is ICssGroupingRule groupingRule) { foreach (ICssRule childRule in groupingRule.Rules) { ScanStyleRule(childRule); } } else if (rule is ICssPageRule pageRule) { ScanStyleDeclaration(pageRule.Style); } else if (rule is ICssKeyframesRule keyFramesRule) { foreach (ICssKeyframeRule childRule in keyFramesRule.Rules.OfType <ICssKeyframeRule>().ToList()) { ScanStyleRule(childRule); } } else if (rule is ICssKeyframeRule keyFrameRule) { ScanStyleDeclaration(keyFrameRule.Style); } else if (rule is ICssImportRule importRule && policy.EmbedsStyleSheets) { if (importRule.Sheet == null) { AddError(Constants.ERROR_CSS_IMPORT_FAILURE, HtmlEntityEncoder.HtmlEntityEncode(importRule.Href)); return(false); } if (ScannedImportedStyleSheets.Count == policy.MaxStyleSheetImports) { AddError(Constants.ERROR_CSS_IMPORT_EXCEEDED, HtmlEntityEncoder.HtmlEntityEncode(importRule.Href), policy.MaxStyleSheetImports); return(false); } int sheetLength = importRule.Sheet.ToCss().Length; if (sheetLength > SizeLimit) { AddError(Constants.ERROR_CSS_IMPORT_TOOLARGE, HtmlEntityEncoder.HtmlEntityEncode(importRule.Href), policy.MaxInputSize); return(false); } SizeLimit -= sheetLength; ScanStyleSheet(importRule.Sheet); ScannedImportedStyleSheets.Add(importRule.ToCss(), importRule.Sheet); }
private void StripCData(HtmlNode node) { AddError(Constants.ERROR_CDATA_FOUND, HtmlEntityEncoder.HtmlEntityEncode(node.InnerHtml)); HtmlNode parent = node.ParentNode; HtmlTextNode textNode = parent.OwnerDocument.CreateTextNode(node.InnerText); parent.InsertBefore(textNode, node); parent.RemoveChild(node); }
private bool RemoveDisallowedEmpty(HtmlNode node) { if (!IsAllowedEmptyTag(node.Name)) { // Wasn't in the list of allowed elements, so we'll nuke it. AddError(Constants.ERROR_TAG_EMPTY, HtmlEntityEncoder.HtmlEntityEncode(node.Name)); RemoveNode(node); return(true); } return(false); }
private void EncodeTag(HtmlNode node, string tagName) { AddError(Constants.ERROR_TAG_ENCODED, HtmlEntityEncoder.HtmlEntityEncode(tagName)); ProcessChildren(node); /* * Transform the tag to text, HTML-encode it and promote the children. * The tag will be kept in the fragment as one or two text Nodes located * before and after the children; representing how the tag used to wrap them. */ EncodeAndPromoteChildren(node); }
private static string NodeToString(HtmlNode node) { var nodeToString = new StringBuilder("<" + node.Name); foreach (HtmlAttribute attribute in node.GetAttributes()) { nodeToString .Append(' ') .Append(HtmlEntityEncoder.HtmlEntityEncode(attribute.Name)) .Append("=\"") .Append(HtmlEntityEncoder.HtmlEntityEncode(attribute.Value)) .Append('"'); } if (node.HasChildNodes) { nodeToString.Append('>'); } else { nodeToString.Append("/>"); } return(nodeToString.ToString()); }
private bool ProcessAttributes(HtmlNode node, Tag tag) { string tagName = tag.Name; int currentAttributeIndex = 0; while (currentAttributeIndex < node.Attributes.Count) { HtmlAttribute htmlAttribute = node.Attributes[currentAttributeIndex]; string name = htmlAttribute.Name; string value = htmlAttribute.Value; Attribute attribute = tag.GetAttributeByName(name); if (attribute == null) { attribute = Policy.GetGlobalAttributeByName(name); // Not a global attribute, perhaps it is a dynamic attribute, if allowed. if (attribute == null && Policy.AllowsDynamicAttributes) { attribute = Policy.GetDynamicAttributeByName(name); } } if (name.ToLowerInvariant() == "style" && attribute != null) { var styleScanner = new CssScanner(Policy); try { CleanResults cleanInlineStyle = styleScanner.ScanInlineStyle(value, tagName); htmlAttribute.Value = cleanInlineStyle.GetCleanHtml(); errorMessages.AddRange(cleanInlineStyle.GetErrorMessages()); } catch (Exception exc) { if (exc is ScanException || exc is ParseException) { AddError(Constants.ERROR_CSS_ATTRIBUTE_MALFORMED, HtmlEntityEncoder.HtmlEntityEncode(value), HtmlEntityEncoder.HtmlEntityEncode(tagName)); node.Attributes.Remove(name); currentAttributeIndex--; } else { throw; } } } else { if (attribute != null) { value = HtmlEntity.DeEntitize(value); string lowerCaseValue = value.ToLowerInvariant(); bool isAttributeValid = attribute.AllowedValues.Any(v => v != null && v.ToLowerInvariant() == lowerCaseValue) || attribute.AllowedRegExp.Any(r => r != null && Regex.IsMatch(value, "^" + r + "$")); if (!isAttributeValid) { string onInvalidAction = attribute.OnInvalid; if (onInvalidAction == "removeTag") { RemoveNode(node); AddError(Constants.ERROR_ATTRIBUTE_INVALID_REMOVED, HtmlEntityEncoder.HtmlEntityEncode(tagName), HtmlEntityEncoder.HtmlEntityEncode(name), HtmlEntityEncoder.HtmlEntityEncode(value)); } else if (onInvalidAction == "filterTag") { // Remove the node and move up the rest that was inside the tag after processing ProcessChildren(node); PromoteChildren(node); AddError(Constants.ERROR_ATTRIBUTE_CAUSE_FILTER, HtmlEntityEncoder.HtmlEntityEncode(tagName), HtmlEntityEncoder.HtmlEntityEncode(name), HtmlEntityEncoder.HtmlEntityEncode(value)); } else if (onInvalidAction == "encodeTag") { // Encode the node and move up the rest that was inside the tag after processing ProcessChildren(node); EncodeAndPromoteChildren(node); AddError(Constants.ERROR_ATTRIBUTE_CAUSE_ENCODE, HtmlEntityEncoder.HtmlEntityEncode(tagName), HtmlEntityEncoder.HtmlEntityEncode(name), HtmlEntityEncoder.HtmlEntityEncode(value)); } else { // Just remove the attribute node.Attributes.Remove(attribute.Name); currentAttributeIndex--; AddError(Constants.ERROR_ATTRIBUTE_INVALID, HtmlEntityEncoder.HtmlEntityEncode(tagName), HtmlEntityEncoder.HtmlEntityEncode(name), HtmlEntityEncoder.HtmlEntityEncode(value)); } if (new string[] { "removeTag", "filterTag", "encodeTag" }.Contains(onInvalidAction)) { return(false); // Can't process any more if we remove/filter/encode the tag } } } else { AddError(Constants.ERROR_ATTRIBUTE_NOT_IN_POLICY, HtmlEntityEncoder.HtmlEntityEncode(tagName), HtmlEntityEncoder.HtmlEntityEncode(name), HtmlEntityEncoder.HtmlEntityEncode(value)); node.Attributes.Remove(name); currentAttributeIndex--; } } currentAttributeIndex++; } return(true); }
private void FilterTag(HtmlNode node, Tag tag, string tagName) { AddError(tag == null ? Constants.ERROR_TAG_NOT_IN_POLICY : Constants.ERROR_TAG_FILTERED, HtmlEntityEncoder.HtmlEntityEncode(tagName)); ProcessChildren(node); PromoteChildren(node); }
private void RemoveProcessingInstruction(HtmlNode node) { // It makes sense to print the outer, inner probably won't have any text. AddError(Constants.ERROR_PI_FOUND, HtmlEntityEncoder.HtmlEntityEncode(node.OuterHtml)); RemoveNode(node); }
/// <summary>The workhorse of the scanner. Recursively scans document elements according to the policy. /// This should be called implicitly through the <c>AntiSamy.Scan()</c> method.</summary> /// <param name="node">The node to validate.</param> private void RecursiveValidateTag(HtmlNode node) { HtmlNode parentNode = node.ParentNode; string tagName = node.Name; if (node is HtmlTextNode) { if (HtmlNode.IsCDataElement(node.Name)) { StripCData(node); } node.InnerHtml = System.Net.WebUtility.HtmlDecode(node.InnerHtml); return; } if (IsProcessingInstruction(node)) { RemoveProcessingInstruction(node); return; } if (node is HtmlCommentNode commentNode) { ProcessCommentNode(commentNode); return; } if (node.NodeType == HtmlNodeType.Element && !node.ChildNodes.Any() && RemoveDisallowedEmpty(node)) { return; } Tag tag = Policy.GetTagByName(tagName.ToLowerInvariant()); /* * If <param> and no policy and ValidatesParamAsEmbed and policy in place for <embed> and <embed> * policy is to validate, use custom policy to get the tag through to the validator. */ bool isMasqueradingParam = IsMasqueradingParam(tag, Policy.GetTagByName("embed"), tagName.ToLowerInvariant()); if (isMasqueradingParam) { tag = BASIC_EMBED_TAG; } if (tag == null && Policy.OnUnknownTagAction == Constants.ACTION_ENCODE || tag != null && tag.Action == Constants.ACTION_ENCODE) { EncodeTag(node, tagName); } else if (tag == null && Policy.OnUnknownTagAction == Constants.ACTION_FILTER || tag != null && tag.Action == Constants.ACTION_FILTER) { FilterTag(node, tag, tagName); } else if (tag != null && tag.Action == Constants.ACTION_VALIDATE) { ValidateTag(node, parentNode, tagName, tag, isMasqueradingParam); } else if (tag == null && Policy.OnUnknownTagAction == Constants.ACTION_TRUNCATE || tag != null && tag.Action == Constants.ACTION_TRUNCATE) { TruncateTag(node, tagName); } else { // If we reached this it means the tag's action is "remove" or the tag is unknown, which means to remove the tag (including its contents). AddError(tag == null ? Constants.ERROR_TAG_NOT_IN_POLICY : Constants.ERROR_TAG_DISALLOWED, HtmlEntityEncoder.HtmlEntityEncode(tagName)); RemoveNode(node); } }