HtmlNode GetStrippedForm(HtmlNode OriginalForm, List<string> InputElementStrings) { OriginalForm.RemoveAllChildren(); foreach (string InputElementString in InputElementStrings) { HTML InputHtml = new HTML(InputElementString); OriginalForm.AppendChild(InputHtml.Html.DocumentNode.FirstChild); } return OriginalForm; }
/// <summary> /// Replaces all child nodes with the supplied nodes and indents them +1 tab. /// </summary> private void ReplaceAllChildren(HtmlNode parent, IEnumerable<HtmlNode> nodes) { // Preserve the indentation of the parent. var parentScope = CreateTextNode(); var prev = parent.PreviousSibling; if (prev == null && parent.ParentNode != null) prev = parent.ParentNode.PreviousSibling; if (prev != null && prev.NodeType == HtmlNodeType.Text) { var m = TabsAndSpaces.Match(prev.InnerText); if (m.Success) parentScope.InnerHtml += m.Value; } // Add one tab of indentation for the children. var childScope = CreateTextNode(parentScope.InnerText + "\t"); // Replace all children with supplied nodes (indented). parent.RemoveAllChildren(); foreach (var option in nodes) { parent.AppendChild(childScope); parent.AppendChild(option); } // Put the closing tag at the same indentation as the opening tag. parent.AppendChild(parentScope); }
private HtmlNode FindTimetableRecursive(HtmlNode node, TextWriter outText) { if (HtmlNodeType.Element.Equals(node.NodeType)) { switch (node.Name) { case "table": if (mTitleFound) { foreach (HtmlAttribute attribute in node.Attributes.AttributesWithName("class")) { if (CLASS_TIMETABLE.Equals(attribute.Value)) { outText.WriteLine("Found timetable!"); return node; } } } break; case "td": foreach (HtmlAttribute attribute in node.Attributes.AttributesWithName("class")) { if (CLASS_TITLE.Equals(attribute.Value)) { switch (node.InnerText) { case TITLE_WEEKDAYS: case TITLE_SATURDAY: case TITLE_SUNDAY: outText.WriteLine("Found " + node.InnerText + "!"); mWeekday = GetWeekdayEnum(node.InnerText); mTitleFound = true; break; } } } break; default: break; } } foreach (HtmlNode subnode in node.ChildNodes) { HtmlNode tableNode = FindTimetableRecursive(subnode, outText); if (tableNode != null) { return tableNode; } } node.RemoveAllChildren(); return null; }
/// <summary> /// Tries to get the node. /// </summary> /// <param name="index">The index.</param> /// <param name="node">The node.</param> /// <returns>A boolean value.</returns> private bool TryGetNode(int index, out HtmlNode node) { var n = XpathUtil.GetNode(_html, string.Format("//*[@diffengineindexer='{0}']", index)); node = null; if (n != null) { // Create a deep copy of the html node = n.CloneNode(true); // Remove the attribute set by the scraper (used for indexing) node.Attributes.Remove("diffengineindexer"); // Discard child elements node.RemoveAllChildren(); } return node != null; }
/// <summary> /// Filters the element, returns whether the element was removed. /// </summary> /// <param name="node"></param> /// <returns></returns> private bool filterElement(HtmlNode node) { if (ElementFilter.IsValid(node)) { for(int i = 0; i < node.ChildNodes.Count; i++) { if (filterElement(node.ChildNodes[i])) { i--; } } return false; } else { node.RemoveAllChildren(); node.Remove(); return true; } }