RemoveChild() public method

Removes the specified child node.
public RemoveChild ( HtmlNode oldChild ) : HtmlNode
oldChild HtmlNode The node being removed. May not be null.
return HtmlNode
Esempio n. 1
0
		public static void RemoveChildKeepGrandChildren ( HtmlNode parent , HtmlNode oldChild )
		{
			if ( oldChild.ChildNodes != null )
			{
				HtmlNode previousSibling = oldChild.PreviousSibling;
				foreach ( HtmlNode newChild in oldChild.ChildNodes )
				{
					parent.InsertAfter ( newChild , previousSibling );
					previousSibling = newChild;  // Missing line in HtmlAgilityPack
				}
			}
			parent.RemoveChild ( oldChild );
		}
        static HtmlNode CleanupHtmlNode(HtmlNode node)
        {
            Contract.Requires(node != null);
            Contract.Requires(Contract.Result<HtmlNode>() != null);

            foreach(var langSpan in node.ChildNodes.Where(n => n.Name == "span" && n.Attributes.Contains("lang")).ToList()) {
                langSpan.ReplaceWithChildNodes();
            }
            foreach(var fontChildNode in node.ChildNodes.Where(n => n.Name == "font").ToList()) {
                var replacingNode = node.OwnerDocument.CreateElement("span");
                replacingNode.Attributes.Add("class", "terminal-symbol");
                replacingNode.InnerHtml = fontChildNode.FirstChild.InnerHtml; // the font node is doubled
                node.ChildNodes.Insert(node.ChildNodes.GetNodeIndex(fontChildNode), replacingNode);
                node.RemoveChild(fontChildNode);
            }
            var ellipsises = new List<string> { "&hellip;", "..." };
            foreach(var ellipsisLineNode in node.ChildNodes.Where(n => ellipsises.Contains(n.InnerText.Trim()) || n.Name == "br" && ellipsises.Contains(n.PreviousSibling?.InnerText?.Trim())).ToList()) {
                ellipsisLineNode.Remove();
            }
            foreach(var childNode in node.ChildNodes) {
                CleanupHtmlNode(childNode);
            }
            return node;
        }
        private void recursiveValidateTag(HtmlNode node)
        {
            int maxinputsize = int.Parse(policy.getDirective("maxInputSize"));

            num++;

            HtmlNode parentNode = node.ParentNode;
            HtmlNode tmp = null;
            string tagName = node.Name;

            //check this out
            //might not be robust enough
            if (tagName.ToLower().Equals("#text"))  // || tagName.ToLower().Equals("#comment"))
            {
                return;
            }

            Tag tag = policy.getTagByName(tagName.ToLower());

            if (tag == null || "filter".Equals(tag.Action))
            {
                StringBuilder errBuff = new StringBuilder();
                if (tagName == null || tagName.Trim().Equals(""))
                    errBuff.Append("An unprocessable ");
                else
                    errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName.ToLower()) + "</b> ");

                errBuff.Append("tag has been filtered for security reasons. The contents of the tag will ");
                errBuff.Append("remain in place.");

                errorMessages.Add(errBuff.ToString());

                for (int i = 0; i < node.ChildNodes.Count; i++)
                {
                    tmp = node.ChildNodes[i];
                    recursiveValidateTag(tmp);

                    if (tmp.ParentNode == null)
                    {
                        i--;
                    }
                }
                promoteChildren(node);
                return;
            }
            else if ("validate".Equals(tag.Action))
            {
                if ("style".Equals(tagName.ToLower()) && policy.getTagByName("style") != null)
                {
                    CssScanner styleScanner = new CssScanner(policy);
                    try
                    {

                        CleanResults cr = styleScanner.scanStyleSheet(node.FirstChild.InnerHtml, maxinputsize);

                        foreach (string msg in cr.getErrorMessages())
                            errorMessages.Add(msg.ToString());

                        /*
                         * If IE gets an empty style tag, i.e. <style/>
                         * it will break all CSS on the page. I wish I
                         * was kidding. So, if after validation no CSS
                         * properties are left, we would normally be left
                         * with an empty style tag and break all CSS. To
                         * prevent that, we have this check.
                         */

                        if (cr.getCleanHTML() == null || cr.getCleanHTML().Equals(""))
                        {

                            //node.getFirstChild().setNodeValue("/* */");
                            node.FirstChild.InnerHtml = "/* */";


                        }
                        else
                        {

                            //node.getFirstChild().setNodeValue(cr.getCleanHTML());
                            node.FirstChild.InnerHtml = cr.getCleanHTML();
                        }

                    }
                    //    catch (DomException e)
                    //    {
                    //        addError(ErrorMessageUtil.ERROR_CSS_TAG_MALFORMED, new Object[] { HTMLEntityEncoder.htmlEntityEncode(node.getFirstChild().getNodeValue()) });
                    //        parentNode.removeChild(node);
                    //    }
                    catch (ScanException e)
                    {
                        Console.WriteLine("Scan Exception: " + e.Message);
                        
                        //addError(ErrorMessageUtil.ERROR_CSS_TAG_MALFORMED, new Object[] { HTMLEntityEncoder.htmlEntityEncode(node.getFirstChild().getNodeValue()) });
                        parentNode.RemoveChild(node);
                    }
                }

                HtmlAttribute attribute = null;
                for (int currentAttributeIndex = 0; currentAttributeIndex < node.Attributes.Count; currentAttributeIndex++)
                {
                    attribute = node.Attributes[currentAttributeIndex];

                    string name = attribute.Name;
                    string _value = attribute.Value;

                    Attribute attr = tag.getAttributeByName(name);

                    if (attr == null)
                    {
                        attr = policy.getGlobalAttributeByName(name);
                    }

                    bool isAttributeValid = false;

                    if ("style".Equals(name.ToLower()) && attr != null)
                    {

                        CssScanner styleScanner = new CssScanner(policy);

                        try
                        {

                            CleanResults cr = styleScanner.scanInlineStyle(_value, tagName, maxinputsize);

                            //attribute.setNodeValue(cr.getCleanHTML());
                            attribute.Value = cr.getCleanHTML();

                            ArrayList cssScanErrorMessages = cr.getErrorMessages();

                            foreach (string msg in cr.getErrorMessages())
                                errorMessages.Add(msg.ToString());

                        }
                        /*
                        catch (DOMException e)
                        {

                            addError(ErrorMessageUtil.ERROR_CSS_ATTRIBUTE_MALFORMED, new Object[] { tagName, HTMLEntityEncoder.htmlEntityEncode(node.getNodeValue()) });

                            ele.removeAttribute(name);
                            currentAttributeIndex--;

                        }
                        */
                        catch (ScanException ex)
                        {
                            Console.WriteLine(ex.Message);
                            //addError(ErrorMessageUtil.ERROR_CSS_ATTRIBUTE_MALFORMED, new Object[] { tagName, HTMLEntityEncoder.htmlEntityEncode(node.getNodeValue()) });
                            //ele.removeAttribute(name);
                            currentAttributeIndex--;
                        }

                    }
                    else
                    {
                        if (attr != null)
                        {
                            //try to find out how robust this is - do I need to do this in a loop?
                            _value = HtmlEntity.DeEntitize(_value);

                            foreach (string allowedValue in attr.AllowedValues)
                            {
                                if (isAttributeValid) break;

                                if (allowedValue != null && allowedValue.ToLower().Equals(_value.ToLower()))
                                {
                                    isAttributeValid = true;
                                }
                            }

                            foreach (string ptn in attr.AllowedRegExp)
                            {
                                if (isAttributeValid) break;
                                string pattern = "^" + ptn + "$";
                                Match m = Regex.Match(_value, pattern);
                                if (m.Success)
                                {
                                    isAttributeValid = true;
                                }
                            }

                            if (!isAttributeValid)
                            {
                                string onInvalidAction = attr.OnInvalid;
                                StringBuilder errBuff = new StringBuilder();

                                errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag contained an attribute that we couldn't process. ");
                                errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(name) + "</b> attribute had a value of <u>" + HTMLEntityEncoder.htmlEntityEncode(_value) + "</u>. ");
                                errBuff.Append("This value could not be accepted for security reasons. We have chosen to ");

                                //Console.WriteLine(policy);

                                if ("removeTag".Equals(onInvalidAction))
                                {
                                    parentNode.RemoveChild(node);
                                    errBuff.Append("remove the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag and its contents in order to process this input. ");
                                }
                                else if ("filterTag".Equals(onInvalidAction))
                                {
                                    for (int i = 0; i < node.ChildNodes.Count; i++)
                                    {
                                        tmp = node.ChildNodes[i];
                                        recursiveValidateTag(tmp);
                                        if (tmp.ParentNode == null)
                                        {
                                            i--;
                                        }
                                    }

                                    promoteChildren(node);

                                    errBuff.Append("filter the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag and leave its contents in place so that we could process this input.");
                                }
                                else
                                {
                                    node.Attributes.Remove(attr.Name);
                                    currentAttributeIndex--;
                                    errBuff.Append("remove the <b>" + HTMLEntityEncoder.htmlEntityEncode(name) + "</b> attribute from the tag and leave everything else in place so that we could process this input.");

                                }

                                errorMessages.Add(errBuff.ToString());

                                if ("removeTag".Equals(onInvalidAction) || "filterTag".Equals(onInvalidAction))
                                {
                                    return; // can't process any more if we remove/filter the tag	
                                }
                            }
                        }
                        else
                        {
                            StringBuilder errBuff = new StringBuilder();

                            errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(name));
                            errBuff.Append("</b> attribute of the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. ");
                            errBuff.Append("This removal should not affect the display of the HTML submitted.");

                            errorMessages.Add(errBuff.ToString());
                            node.Attributes.Remove(name);
                            currentAttributeIndex--;

                        } // end if attribute is or is not found in policy file
                    } // end if style.equals("name") 
                } // end while loop through attributes 


                for (int i = 0; i < node.ChildNodes.Count; i++)
                {
                    tmp = node.ChildNodes[i];
                    recursiveValidateTag(tmp);
                    if (tmp.ParentNode == null)
                    {
                        i--;
                    }
                }

            }
            else if ("truncate".Equals(tag.Action))
            {
                Console.WriteLine("truncate");
                HtmlAttributeCollection nnmap = node.Attributes;

                while (nnmap.Count > 0)
                {

                    StringBuilder errBuff = new StringBuilder();

                    errBuff.Append("The <b>" + HTMLEntityEncoder.htmlEntityEncode(nnmap[0].Name));
                    errBuff.Append("</b> attribute of the <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag has been removed for security reasons. ");
                    errBuff.Append("This removal should not affect the display of the HTML submitted.");
                    node.Attributes.Remove(nnmap[0].Name);
                    errorMessages.Add(errBuff.ToString());
                }

                HtmlNodeCollection cList = node.ChildNodes;

                int i = 0;
                int j = 0;
                int length = cList.Count;

                while (i < length)
                {

                    HtmlNode nodeToRemove = cList[j];
                    if (nodeToRemove.NodeType != HtmlNodeType.Text && nodeToRemove.NodeType != HtmlNodeType.Comment)
                    {
                        node.RemoveChild(nodeToRemove);
                    }
                    else
                    {
                        j++;
                    }
                    i++;
                }

            }
            else
            {
                errorMessages.Add("The <b>" + HTMLEntityEncoder.htmlEntityEncode(tagName) + "</b> tag has been removed for security reasons.");
                parentNode.RemoveChild(node);
            }
        }
        private void promoteChildren(HtmlNode node)
        {

            HtmlNodeCollection nodeList = node.ChildNodes;
            HtmlNode parent = node.ParentNode;

            while (nodeList.Count > 0)
            {
                HtmlNode removeNode = node.RemoveChild(nodeList[0]);
                parent.InsertBefore(removeNode, node);
            }

            parent.RemoveChild(node);
        }
Esempio n. 5
0
        public static void RemoveSubHtmlNode(HtmlNode curHtmlNode, string subNodeToRemove)
        {
            try
            {
                var foundAllSub = curHtmlNode.SelectNodes(subNodeToRemove);
                if (foundAllSub != null)
                {
                    foreach (HtmlNode subNode in foundAllSub)
                    {
                        curHtmlNode.RemoveChild(subNode);
                    }
                }
            }
            catch (Exception ex)
            {

                throw ex;
            }

            //return curHtmlNode;
        }
Esempio n. 6
0
        private static HtmlNode ClearNodes(HtmlNode JobOfferElement)
        {
            //var trsToRemove = JobOfferElement.Elements("tr").ToList();

            //JobOfferElement.RemoveChild(trsToRemove[0]);
            //JobOfferElement.RemoveChild(trsToRemove[1]);
            //JobOfferElement.RemoveChild(trsToRemove[2]);

            JobOfferElement = RemoveDescendants(JobOfferElement, new string[] { "a", "img", "script", "style" });
            JobOfferElement.RemoveChild(JobOfferElement.Element("tr"));

            var trS = JobOfferElement.Elements("tr").ToList();

            bool removeNext = false;
            foreach (var item in trS)
            {
                if (removeNext == false)
                {
                    if (item.Descendants().Where(
                        d => (d.Attributes.Contains("class") &&
                            d.Attributes["class"].Value.Contains("button_new"))
                            ).Count() > 0)
                    {
                        removeNext = true;
                    }
                }
                if (removeNext == true)
                {
                    JobOfferElement.RemoveChild(item);
                }
            }

            return JobOfferElement;
        }
Esempio n. 7
0
 /// <summary>
 /// 删除所有的属性和子元素,但保留文本和备注节点
 /// </summary>
 /// <param name="node"></param>
 void TruncateAction(HtmlNode node)
 {
     HtmlAttributeCollection attrs = node.Attributes;
     while (attrs.Count > 0)
     {
         node.Attributes.Remove(attrs[0].Name);
     }
     HtmlNodeCollection nodes = node.ChildNodes;
     int position = 0;
     while (nodes.Count > position)
     {
         HtmlNode nodeToRemove = nodes[position];
         var type = nodeToRemove.NodeType;
         if (type == HtmlNodeType.Text || type == HtmlNodeType.Comment) { position++; continue; }
         node.RemoveChild(nodeToRemove);
     }
 }
Esempio n. 8
0
 /// <summary>
 /// 将指定节点从父节点中移除,但其子节点保留
 /// </summary>
 /// <param name="node"></param>
 void PromoteChildren(HtmlNode node)
 {
     ///过滤子节点
     FiltersTags(node.ChildNodes);
     HtmlNodeCollection nodeList = node.ChildNodes;
     HtmlNode parent = node.ParentNode;
     ///将它的所有子节点往上移到父节点的前面
     while (nodeList.Count > 0)
     {
         HtmlNode removeNode = node.RemoveChild(nodeList[0]);
         parent.InsertBefore(removeNode, node);
     }
     //然后将节点删除
     parent.RemoveChild(node);
 }
Esempio n. 9
0
    //remove sub node from current html node
    //eg: 
    //"script"
    //for
    //<script type="text/javascript"> 
    public HtmlNode removeSubHtmlNode(HtmlNode curHtmlNode, string subNodeToRemove)
    {
        HtmlNode afterRemoved = curHtmlNode;
        
        ////method 1: fail
        ////foreach (var subNode in afterRemoved.Descendants(subNodeToRemove))
        //foreach (HtmlNode subNode in afterRemoved.Descendants(subNodeToRemove))
        //{
        //    //An unhandled exception of type 'System.InvalidOperationException' occurred in mscorlib.dll
        //    //Additional information: Collection was modified; enumeration operation may not execute.
            
        //    //afterRemoved.RemoveChild(subNode);
        //    //curHtmlNode.RemoveChild(subNode);
        //    subNode.Remove();
        //}

        //method 2: OK
        HtmlNodeCollection foundAllSub = curHtmlNode.SelectNodes(subNodeToRemove);
        if ((foundAllSub != null) && (foundAllSub.Count > 0))
        {
            foreach (HtmlNode subNode in foundAllSub)
            {
                curHtmlNode.RemoveChild(subNode);
            }
        }

        return afterRemoved;
    }
Esempio n. 10
0
        private static Entry ExtractHolidayFromNode(HtmlNode node)
        {
            var entry = new Entry();
            entry.Links = ExtractAllLinksFromHtmlNode(node);
            entry.Link = ExtractFirstLink(node, entry);

            // put sublists into a description
            if (node.HasChildNodes)
            {
                // TODO: redo this as node parsing
                HtmlNode extraListNode = node.Descendants("ul").FirstOrDefault();
                if (extraListNode != null)
                {
                    entry.Description = HttpUtility.HtmlDecode(extraListNode.InnerText).Trim();
                    node.RemoveChild(extraListNode);
                }
            }

            entry.Year = HttpUtility.HtmlDecode(node.InnerText.Trim().TrimEnd(':'));

            return entry;
        }
Esempio n. 11
0
 void KillElems(HtmlNode n)
 {
     var cs = n.ChildNodes.Cast<HtmlNode> ().ToArray ();
     foreach (var c in cs) {
         var name = c.Name.ToLowerInvariant ();
         if (name == "input" || name == "textarea" || name == "button" || name == "script" || name == "form") {
             n.RemoveChild (c);
         } else if (name == "a") {
             var href = c.Attributes["href"];
             if (href == null || !href.Value.StartsWith ("http")) {
                 n.RemoveChild (c);
             }
         }
     }
     foreach (var c in n.ChildNodes) {
         KillElems (c);
     }
 }