public RemoveChild ( |
||
oldChild | The node being removed. May not be null. | |
return |
public static void RemoveChildKeepGrandChildren ( HtmlNode parent , HtmlNode oldChild ) { if ( oldChild.ChildNodes != null ) { HtmlNode previousSibling = oldChild.PreviousSibling; foreach ( HtmlNode newChild in oldChild.ChildNodes ) { parent.InsertAfter ( newChild , previousSibling ); previousSibling = newChild; // Missing line in HtmlAgilityPack } } parent.RemoveChild ( oldChild ); }
static HtmlNode CleanupHtmlNode(HtmlNode node) { Contract.Requires(node != null); Contract.Requires(Contract.Result<HtmlNode>() != null); foreach(var langSpan in node.ChildNodes.Where(n => n.Name == "span" && n.Attributes.Contains("lang")).ToList()) { langSpan.ReplaceWithChildNodes(); } foreach(var fontChildNode in node.ChildNodes.Where(n => n.Name == "font").ToList()) { var replacingNode = node.OwnerDocument.CreateElement("span"); replacingNode.Attributes.Add("class", "terminal-symbol"); replacingNode.InnerHtml = fontChildNode.FirstChild.InnerHtml; // the font node is doubled node.ChildNodes.Insert(node.ChildNodes.GetNodeIndex(fontChildNode), replacingNode); node.RemoveChild(fontChildNode); } var ellipsises = new List<string> { "…", "..." }; foreach(var ellipsisLineNode in node.ChildNodes.Where(n => ellipsises.Contains(n.InnerText.Trim()) || n.Name == "br" && ellipsises.Contains(n.PreviousSibling?.InnerText?.Trim())).ToList()) { ellipsisLineNode.Remove(); } foreach(var childNode in node.ChildNodes) { CleanupHtmlNode(childNode); } return node; }
private void recursiveValidateTag(HtmlNode node) { int maxinputsize = int.Parse(policy.getDirective("maxInputSize")); num++; HtmlNode parentNode = node.ParentNode; HtmlNode tmp = null; string tagName = node.Name; //check this out //might not be robust enough if (tagName.ToLower().Equals("#text")) // || tagName.ToLower().Equals("#comment")) { return; } Tag tag = policy.getTagByName(tagName.ToLower()); if (tag == null || "filter".Equals(tag.Action)) { StringBuilder errBuff = new StringBuilder(); if (tagName == null || tagName.Trim().Equals("")) errBuff.Append("An unprocessable "); else errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName.ToLower()) + "</b> "); errBuff.Append("tag has been filtered for security reasons. The contents of the tag will "); errBuff.Append("remain in place."); errorMessages.Add(errBuff.ToString()); for (int i = 0; i < node.ChildNodes.Count; i++) { tmp = node.ChildNodes[i]; recursiveValidateTag(tmp); if (tmp.ParentNode == null) { i--; } } promoteChildren(node); return; } else if ("validate".Equals(tag.Action)) { if ("style".Equals(tagName.ToLower()) && policy.getTagByName("style") != null) { CssScanner styleScanner = new CssScanner(policy); try { CleanResults cr = styleScanner.scanStyleSheet(node.FirstChild.InnerHtml, maxinputsize); foreach (string msg in cr.getErrorMessages()) errorMessages.Add(msg.ToString()); /* * If IE gets an empty style tag, i.e. <style/> * it will break all CSS on the page. I wish I * was kidding. So, if after validation no CSS * properties are left, we would normally be left * with an empty style tag and break all CSS. To * prevent that, we have this check. */ if (cr.getCleanHTML() == null || cr.getCleanHTML().Equals("")) { //node.getFirstChild().setNodeValue("/* */"); node.FirstChild.InnerHtml = "/* */"; } else { //node.getFirstChild().setNodeValue(cr.getCleanHTML()); node.FirstChild.InnerHtml = cr.getCleanHTML(); } } // catch (DomException e) // { // addError(ErrorMessageUtil.ERROR_CSS_TAG_MALFORMED, new Object[] { HTMLEntityEncoder.htmlEntityEncode(node.getFirstChild().getNodeValue()) }); // parentNode.removeChild(node); // } catch (ScanException e) { Console.WriteLine("Scan Exception: " + e.Message); //addError(ErrorMessageUtil.ERROR_CSS_TAG_MALFORMED, new Object[] { HTMLEntityEncoder.htmlEntityEncode(node.getFirstChild().getNodeValue()) }); parentNode.RemoveChild(node); } } HtmlAttribute attribute = null; for (int currentAttributeIndex = 0; currentAttributeIndex < node.Attributes.Count; currentAttributeIndex++) { attribute = node.Attributes[currentAttributeIndex]; string name = attribute.Name; string _value = attribute.Value; Attribute attr = tag.getAttributeByName(name); if (attr == null) { attr = policy.getGlobalAttributeByName(name); } bool isAttributeValid = false; if ("style".Equals(name.ToLower()) && attr != null) { CssScanner styleScanner = new CssScanner(policy); try { CleanResults cr = styleScanner.scanInlineStyle(_value, tagName, maxinputsize); //attribute.setNodeValue(cr.getCleanHTML()); attribute.Value = cr.getCleanHTML(); ArrayList cssScanErrorMessages = cr.getErrorMessages(); foreach (string msg in cr.getErrorMessages()) errorMessages.Add(msg.ToString()); } /* catch (DOMException e) { addError(ErrorMessageUtil.ERROR_CSS_ATTRIBUTE_MALFORMED, new Object[] { tagName, HTMLEntityEncoder.htmlEntityEncode(node.getNodeValue()) }); ele.removeAttribute(name); currentAttributeIndex--; } */ catch (ScanException ex) { Console.WriteLine(ex.Message); //addError(ErrorMessageUtil.ERROR_CSS_ATTRIBUTE_MALFORMED, new Object[] { tagName, HTMLEntityEncoder.htmlEntityEncode(node.getNodeValue()) }); //ele.removeAttribute(name); currentAttributeIndex--; } } else { if (attr != null) { //try to find out how robust this is - do I need to do this in a loop? _value = HtmlEntity.DeEntitize(_value); foreach (string allowedValue in attr.AllowedValues) { if (isAttributeValid) break; if (allowedValue != null && allowedValue.ToLower().Equals(_value.ToLower())) { isAttributeValid = true; } } foreach (string ptn in attr.AllowedRegExp) { if (isAttributeValid) break; string pattern = "^" + ptn + "$"; Match m = Regex.Match(_value, pattern); if (m.Success) { isAttributeValid = true; } } if (!isAttributeValid) { string onInvalidAction = attr.OnInvalid; StringBuilder errBuff = new StringBuilder(); errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag contained an attribute that we couldn't process. "); errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(name) + "</b> attribute had a value of <u>" + HTMLEntityEncoder.htmlEntityEncode(_value) + "</u>. "); errBuff.Append("This value could not be accepted for security reasons. We have chosen to "); //Console.WriteLine(policy); if ("removeTag".Equals(onInvalidAction)) { parentNode.RemoveChild(node); errBuff.Append("remove the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag and its contents in order to process this input. "); } else if ("filterTag".Equals(onInvalidAction)) { for (int i = 0; i < node.ChildNodes.Count; i++) { tmp = node.ChildNodes[i]; recursiveValidateTag(tmp); if (tmp.ParentNode == null) { i--; } } promoteChildren(node); errBuff.Append("filter the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag and leave its contents in place so that we could process this input."); } else { node.Attributes.Remove(attr.Name); currentAttributeIndex--; errBuff.Append("remove the <b>" + HTMLEntityEncoder.htmlEntityEncode(name) + "</b> attribute from the tag and leave everything else in place so that we could process this input."); } errorMessages.Add(errBuff.ToString()); if ("removeTag".Equals(onInvalidAction) || "filterTag".Equals(onInvalidAction)) { return; // can't process any more if we remove/filter the tag } } } else { StringBuilder errBuff = new StringBuilder(); errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(name)); errBuff.Append("</b> attribute of the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. "); errBuff.Append("This removal should not affect the display of the HTML submitted."); errorMessages.Add(errBuff.ToString()); node.Attributes.Remove(name); currentAttributeIndex--; } // end if attribute is or is not found in policy file } // end if style.equals("name") } // end while loop through attributes for (int i = 0; i < node.ChildNodes.Count; i++) { tmp = node.ChildNodes[i]; recursiveValidateTag(tmp); if (tmp.ParentNode == null) { i--; } } } else if ("truncate".Equals(tag.Action)) { Console.WriteLine("truncate"); HtmlAttributeCollection nnmap = node.Attributes; while (nnmap.Count > 0) { StringBuilder errBuff = new StringBuilder(); errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(nnmap[0].Name)); errBuff.Append("</b> attribute of the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. "); errBuff.Append("This removal should not affect the display of the HTML submitted."); node.Attributes.Remove(nnmap[0].Name); errorMessages.Add(errBuff.ToString()); } HtmlNodeCollection cList = node.ChildNodes; int i = 0; int j = 0; int length = cList.Count; while (i < length) { HtmlNode nodeToRemove = cList[j]; if (nodeToRemove.NodeType != HtmlNodeType.Text && nodeToRemove.NodeType != HtmlNodeType.Comment) { node.RemoveChild(nodeToRemove); } else { j++; } i++; } } else { errorMessages.Add("The <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag has been removed for security reasons."); parentNode.RemoveChild(node); } }
private void promoteChildren(HtmlNode node) { HtmlNodeCollection nodeList = node.ChildNodes; HtmlNode parent = node.ParentNode; while (nodeList.Count > 0) { HtmlNode removeNode = node.RemoveChild(nodeList[0]); parent.InsertBefore(removeNode, node); } parent.RemoveChild(node); }
public static void RemoveSubHtmlNode(HtmlNode curHtmlNode, string subNodeToRemove) { try { var foundAllSub = curHtmlNode.SelectNodes(subNodeToRemove); if (foundAllSub != null) { foreach (HtmlNode subNode in foundAllSub) { curHtmlNode.RemoveChild(subNode); } } } catch (Exception ex) { throw ex; } //return curHtmlNode; }
private static HtmlNode ClearNodes(HtmlNode JobOfferElement) { //var trsToRemove = JobOfferElement.Elements("tr").ToList(); //JobOfferElement.RemoveChild(trsToRemove[0]); //JobOfferElement.RemoveChild(trsToRemove[1]); //JobOfferElement.RemoveChild(trsToRemove[2]); JobOfferElement = RemoveDescendants(JobOfferElement, new string[] { "a", "img", "script", "style" }); JobOfferElement.RemoveChild(JobOfferElement.Element("tr")); var trS = JobOfferElement.Elements("tr").ToList(); bool removeNext = false; foreach (var item in trS) { if (removeNext == false) { if (item.Descendants().Where( d => (d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("button_new")) ).Count() > 0) { removeNext = true; } } if (removeNext == true) { JobOfferElement.RemoveChild(item); } } return JobOfferElement; }
/// <summary> /// 删除所有的属性和子元素,但保留文本和备注节点 /// </summary> /// <param name="node"></param> void TruncateAction(HtmlNode node) { HtmlAttributeCollection attrs = node.Attributes; while (attrs.Count > 0) { node.Attributes.Remove(attrs[0].Name); } HtmlNodeCollection nodes = node.ChildNodes; int position = 0; while (nodes.Count > position) { HtmlNode nodeToRemove = nodes[position]; var type = nodeToRemove.NodeType; if (type == HtmlNodeType.Text || type == HtmlNodeType.Comment) { position++; continue; } node.RemoveChild(nodeToRemove); } }
/// <summary> /// 将指定节点从父节点中移除,但其子节点保留 /// </summary> /// <param name="node"></param> void PromoteChildren(HtmlNode node) { ///过滤子节点 FiltersTags(node.ChildNodes); HtmlNodeCollection nodeList = node.ChildNodes; HtmlNode parent = node.ParentNode; ///将它的所有子节点往上移到父节点的前面 while (nodeList.Count > 0) { HtmlNode removeNode = node.RemoveChild(nodeList[0]); parent.InsertBefore(removeNode, node); } //然后将节点删除 parent.RemoveChild(node); }
//remove sub node from current html node //eg: //"script" //for //<script type="text/javascript"> public HtmlNode removeSubHtmlNode(HtmlNode curHtmlNode, string subNodeToRemove) { HtmlNode afterRemoved = curHtmlNode; ////method 1: fail ////foreach (var subNode in afterRemoved.Descendants(subNodeToRemove)) //foreach (HtmlNode subNode in afterRemoved.Descendants(subNodeToRemove)) //{ // //An unhandled exception of type 'System.InvalidOperationException' occurred in mscorlib.dll // //Additional information: Collection was modified; enumeration operation may not execute. // //afterRemoved.RemoveChild(subNode); // //curHtmlNode.RemoveChild(subNode); // subNode.Remove(); //} //method 2: OK HtmlNodeCollection foundAllSub = curHtmlNode.SelectNodes(subNodeToRemove); if ((foundAllSub != null) && (foundAllSub.Count > 0)) { foreach (HtmlNode subNode in foundAllSub) { curHtmlNode.RemoveChild(subNode); } } return afterRemoved; }
private static Entry ExtractHolidayFromNode(HtmlNode node) { var entry = new Entry(); entry.Links = ExtractAllLinksFromHtmlNode(node); entry.Link = ExtractFirstLink(node, entry); // put sublists into a description if (node.HasChildNodes) { // TODO: redo this as node parsing HtmlNode extraListNode = node.Descendants("ul").FirstOrDefault(); if (extraListNode != null) { entry.Description = HttpUtility.HtmlDecode(extraListNode.InnerText).Trim(); node.RemoveChild(extraListNode); } } entry.Year = HttpUtility.HtmlDecode(node.InnerText.Trim().TrimEnd(':')); return entry; }
void KillElems(HtmlNode n) { var cs = n.ChildNodes.Cast<HtmlNode> ().ToArray (); foreach (var c in cs) { var name = c.Name.ToLowerInvariant (); if (name == "input" || name == "textarea" || name == "button" || name == "script" || name == "form") { n.RemoveChild (c); } else if (name == "a") { var href = c.Attributes["href"]; if (href == null || !href.Value.StartsWith ("http")) { n.RemoveChild (c); } } } foreach (var c in n.ChildNodes) { KillElems (c); } }