private IDomElement GetNextChild(IDomContainer parent, int currentIndex, out int newIndex) { int index = currentIndex; var children = parent.ChildNodes; int count = children.Count; IDomObject effectiveNextChild = null; while (++index < count) { effectiveNextChild = GetEffectiveChild(children, index); if (effectiveNextChild.NodeType == NodeType.ELEMENT_NODE) { break; } } if (index < count) { newIndex = index; return((IDomElement)effectiveNextChild); } else { newIndex = -1; return(null); } }
public static string GetLastMessageText_Html(string rawBody) { CQ dom = rawBody; const string messageSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in"; foreach (IDomObject element in dom["*"]) { // Lots of email clients insert html elements as message delimiters which have styling but no inner text // This block checks for some of these patterns if (element.NodeName == "DIV") { if (element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || messageSeparatorStyle.Equals(element.GetAttribute("style"))) { IDomContainer parent = element.ParentNode; element.Remove(); RemoveSubsequent(parent); break; } } if (!element.ChildElements.Any() && !string.IsNullOrWhiteSpace(element.InnerText)) { var separatorIndex = IndexOfAny(element.InnerText, MessageBorderMarkers); if (separatorIndex.HasValue) { element.InnerText = element.InnerText.Substring(0, separatorIndex.Value); RemoveSubsequent(element); break; } } } return(dom.Render()); }
/// <summary> /// Return DOM position matches (other than Nth Child) /// </summary> /// <param name="elm"></param> /// <param name="position"></param> /// <returns></returns> protected IEnumerable <IDomObject> GetSimpleDomPostionMatches(IDomContainer elm, PositionType position) { if (position == PositionType.FirstChild) { IDomObject child = elm.FirstChild; if (child.NodeType != NodeType.ELEMENT_NODE) { child = child.NextElementSibling; } if (child != null) { yield return(child); } } else if (position == PositionType.LastChild) { IDomObject child = elm.LastChild; if (child.NodeType != NodeType.ELEMENT_NODE) { child = child.PreviousElementSibling; } if (child != null) { yield return(child); } } else { int index = 0; foreach (var child in elm.ChildNodes) { switch (position) { case PositionType.Odd: if (index % 2 != 0) { yield return(child); } break; case PositionType.Even: if (index % 2 == 0) { yield return(child); } break; case PositionType.All: yield return(child); break; default: throw new NotImplementedException("Unimplemented position type selector"); } } } }
/// <summary> /// Return the first child of element /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable<IDomObject> ChildMatches(IDomContainer element) { IDomObject child = element.FirstElementChild; if (child != null) { yield return child; } }
/// <summary> /// Return the only child of the parent element, or nothing if there are zero or more than one /// children. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable<IDomObject> ChildMatches(IDomContainer element) { IDomObject child = OnlyChildOrNull(element); if (child != null) { yield return child; } }
/// <summary> /// Return each child that matches an index returned by the forumla /// </summary> /// /// <param name="obj"> /// The parent object. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// <param name="onlyNodeName"> /// The type of node to match. /// </param> /// <param name="fromLast"> /// Count from the last element instead of the first. /// </param> /// /// <returns> /// An enumerator that allows foreach to be used to process get matching children in this /// collection. /// </returns> public IEnumerable <IDomObject> GetMatchingChildren(IDomContainer obj, string formula, string onlyNodeName = null, bool fromLast = false) { OnlyNodeName = onlyNodeName; FromLast = fromLast; return(GetMatchingChildren(obj, formula)); }
/// <summary> /// Return the only child of the parent element, or nothing if there are zero or more than one /// children. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable <IDomObject> ChildMatches(IDomContainer element) { IDomObject child = OnlyChildOrNull(element); if (child != null) { yield return(child); } }
/// <summary> /// Return the first child of element /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable <IDomObject> ChildMatches(IDomContainer element) { IDomObject child = element.FirstChild; if (child != null) { yield return(element.FirstElementChild); } }
/// <summary> /// Return all child elements of element that are the last child of their type /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable<IDomObject> ChildMatches(IDomContainer element) { HashSet<ushort> Types = new HashSet<ushort>(); foreach (var child in element.ChildElements.Reverse()) { if (!Types.Contains(child.NodeNameID)) { Types.Add(child.NodeNameID); yield return child; } } }
private IDomElement GetNthChild(IDomContainer parent, int index) { int newActualIndex; int elementIndex = 1; IDomElement nthChild = GetNextChild(parent, -1, out newActualIndex); while (nthChild != null && elementIndex != index) { nthChild = GetNextChild(parent, newActualIndex, out newActualIndex); elementIndex++; } return(nthChild); }
/// <summary> /// Return all child elements of element that are the last child of their type /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable <IDomObject> ChildMatches(IDomContainer element) { HashSet <ushort> Types = new HashSet <ushort>(); foreach (var child in element.ChildElements.Reverse()) { if (!Types.Contains(child.NodeNameID)) { Types.Add(child.NodeNameID); yield return(child); } } }
/// <summary> /// Return all children of the parameter element that are the first child of their type. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable <IDomObject> ChildMatches(IDomContainer element) { var types = new HashSet <ulong>(); foreach (var child in element.ChildElements) { if (!types.Contains(child.NodeNameID)) { types.Add(child.NodeNameID); yield return(child); } } }
/// <summary> /// 尝试从 DOM 中移除此节点 /// </summary> /// <remarks> /// 一旦从 DOM 中移除节点,则节点及其子节点均会被标为已销毁 /// </remarks> public virtual void Remove() { if ( removed ) return; if ( _container == null ) throw new InvalidOperationException(); lock ( _container.SyncRoot ) { _container.NodeCollection.Remove( this ); _container = null; removed = true; } }
public static string GetLastMessageText_Html(string rawBody) { CQ dom = rawBody; const string outlookDesktopSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in"; const string outlookMobileSeparatorStyle = "display:inline-block;width:98%"; // There's no well-defined way to parse the latest email from a thread // We have to use heuristics to cover different email clients foreach (IDomObject element in dom["*"]) { // Lots of email clients insert html elements as message delimiters which have styling but no inner text // This block checks for some of these patterns if (string.Equals(element.NodeName, "div", StringComparison.OrdinalIgnoreCase) && (element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || outlookDesktopSeparatorStyle.Equals(element.GetAttribute("style")))) { IDomContainer parent = element.ParentNode; RemoveSubsequent(parent); parent.Remove(); break; } if (string.Equals(element.NodeName, "hr", StringComparison.OrdinalIgnoreCase) && outlookMobileSeparatorStyle.Equals(element.GetAttribute("style"))) { RemoveSubsequent(element); element.Remove(); break; } if (!element.ChildElements.Any() && !string.IsNullOrWhiteSpace(element.InnerText)) { var separatorIndex = IndexOfAny(element.InnerText, MessageBorderMarkers); if (separatorIndex.HasValue) { element.InnerText = element.InnerText.Substring(0, separatorIndex.Value); RemoveSubsequent(element); break; } } } return(dom.Render()); }
protected IEnumerable <IDomElement> GetSiblings(IEnumerable <IDomObject> list) { foreach (var item in list) { IDomContainer parent = item.ParentNode; int index = item.Index + 1; int length = parent.ChildNodes.Count; while (index < length) { IDomElement node = parent.ChildNodes[index] as IDomElement; if (node != null) { yield return(node); } index++; } } }
/// <summary> /// 尝试从 DOM 中移除此节点 /// </summary> /// <remarks> /// 一旦从 DOM 中移除节点,则节点及其子节点均会被标为已销毁 /// </remarks> public virtual void Remove() { if (removed) { return; } if (_container == null) { throw new InvalidOperationException(); } lock (_container.SyncRoot) { _container.NodeCollection.Remove(this); _container = null; removed = true; } }
/// <summary> /// Return the relative position of an element among its Element siblings (non-element nodes excluded) /// </summary> /// <param name="element"></param> /// <returns></returns> protected int GetElementIndex(IDomObject element) { int count = 0; IDomContainer parent = element.ParentNode; if (parent == null) { count = -1; } else { foreach (IDomElement el in parent.ChildElements) { if (ReferenceEquals(el, element)) { break; } count++; } } return(count); }
/// <summary> /// Return each child that matches an index returned by the forumla. /// </summary> /// /// <param name="obj"> /// The parent object. /// </param> /// /// <returns> /// Sequence of matching children. /// </returns> public IEnumerable <IDomObject> GetMatchingChildren(IDomContainer obj) { if (!obj.HasChildren) { yield break; } else if (IsJustNumber) { IDomElement child = GetNthChild(obj, MatchOnlyIndex); if (child != null) { yield return(child); } else { yield break; } } else { UpdateCacheInfo(obj.ChildNodes.Count); int elementIndex = 1; int newActualIndex = -1; IDomElement el = GetNextChild(obj, -1, out newActualIndex); while (newActualIndex >= 0) { if (cacheInfo.MatchingIndices.Contains(elementIndex)) { yield return(el); } el = GetNextChild(obj, newActualIndex, out newActualIndex); elementIndex++; } } }
/// <summary> /// Enumerates all the elements that are the nth-last-of-type /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of matching elements /// </returns> public override IEnumerable <IDomObject> ChildMatches(IDomContainer element) { return(NthC.NthChildsOfType(element, Parameters[0], true)); }
/// <summary> /// Constructor binding this list to its owner /// </summary> /// /// <param name="owner"> /// The object that owns this list (the parent) /// </param> public ChildNodeList(IDomContainer owner) { Owner = owner; }
/// <summary> /// Basic implementation of ChildMatches, runs the Matches method against each child. This should /// be overridden with something more efficient if possible. For example, selectors that inspect /// the element's index could get their results more easily by picking the correct results from /// the list of children rather than testing each one. /// /// Also note that the default iterator for ChildMatches only passed element (e.g. non-text node) /// children. If you wanted to design a filter that worked on other node types, you should /// override this to access all children instead of just the elements. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public virtual IEnumerable<IDomObject> ChildMatches(IDomContainer element) { return element.ChildElements.Where(item => Matches(item)); }
/// <summary> /// Return DOM position matches (other than Nth Child) /// </summary> /// <param name="elm"></param> /// <param name="position"></param> /// <returns></returns> protected IEnumerable<IDomObject> GetSimpleDomPostionMatches(IDomContainer elm, PositionType position) { if (position == PositionType.FirstChild) { IDomObject child = elm.FirstChild; if (child.NodeType != NodeType.ELEMENT_NODE) { child = child.NextElementSibling; } if (child != null) { yield return child; } } else if (position == PositionType.LastChild) { IDomObject child = elm.LastChild; if (child.NodeType != NodeType.ELEMENT_NODE) { child = child.PreviousElementSibling; } if (child != null) { yield return child; } } else { int index = 0; foreach (var child in elm.ChildNodes) { switch (position) { case PositionType.Odd: if (index % 2 != 0) { yield return child; } break; case PositionType.Even: if (index % 2 == 0) { yield return child; } break; case PositionType.All: yield return child; break; default: throw new NotImplementedException("Unimplemented position type selector"); } } } }
public DomNodeCollection(IDomContainer container) : base(container.SyncRoot) { Container = container; }
private IDomElement GetNextChild(IDomContainer parent, int currentIndex, out int newIndex) { int index = currentIndex; var children = parent.ChildNodes; int count = children.Count; IDomObject effectiveNextChild = null; while (++index < count) { effectiveNextChild= GetEffectiveChild(children, index); if (effectiveNextChild.NodeType == NodeType.ELEMENT_NODE) { break; } } if (index < count) { newIndex = index; return (IDomElement)effectiveNextChild; } else { newIndex = -1; return null; } }
private IDomElement GetNthChild(IDomContainer parent, int index) { int newActualIndex; int elementIndex = 1; IDomElement nthChild = GetNextChild(parent,-1, out newActualIndex); while (nthChild != null && elementIndex != index) { nthChild = GetNextChild(parent, newActualIndex, out newActualIndex); elementIndex++; } return nthChild; }
/// <summary> /// Return each child that matches an index returned by the forumla. /// </summary> /// /// <param name="obj"> /// The parent object. /// </param> /// /// <returns> /// Sequence of matching children. /// </returns> public IEnumerable<IDomObject> GetMatchingChildren(IDomContainer obj) { if (!obj.HasChildren) { yield break; } else if (IsJustNumber) { IDomElement child = GetNthChild(obj,MatchOnlyIndex); if (child != null) { yield return child; } else { yield break; } } else { UpdateCacheInfo(obj.ChildNodes.Count); int elementIndex = 1; int newActualIndex=-1; IDomElement el = GetNextChild(obj, -1, out newActualIndex); while (newActualIndex >= 0) { if (cacheInfo.MatchingIndices.Contains(elementIndex)) { yield return el; } el = GetNextChild(obj, newActualIndex, out newActualIndex); elementIndex++; } } }
/// <summary> /// Basic implementation of ChildMatches, runs the Matches method against each child. This should /// be overridden with something more efficient if possible. For example, selectors that inspect /// the element's index could get their results more easily by picking the correct results from /// the list of children rather than testing each one. /// /// Also note that the default iterator for ChildMatches only passed element (e.g. non-text node) /// children. If you wanted to design a filter that worked on other node types, you should /// override this to access all children instead of just the elements. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public virtual IEnumerable <IDomObject> ChildMatches(IDomContainer element) { return(element.ChildElements.Where(item => Matches(item))); }
/// <summary> /// Enumerates nth children in this collection. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// <param name="fromLast"> /// When true, count from the last element instead of the first. /// </param> /// /// <returns> /// An enumerator that allows foreach to be used to process nth childs in this collection. /// </returns> public IEnumerable <IDomObject> NthChilds(IDomContainer element, string formula, bool fromLast = false) { return(GetMatchingChildren(element, formula, null, fromLast)); }
public static T InsertNode <T>(this IDomContainer container, int index, T node) where T : DomNode { container.NodeCollection.Insert(index, node); return(node); }
/// <summary> /// Return each child that matches an index returned by the forumla. /// </summary> /// /// <param name="obj"> /// The parent object. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// /// <returns> /// An enumerator that allows foreach to be used to process get matching children in this /// collection. /// </returns> public IEnumerable <IDomObject> GetMatchingChildren(IDomContainer obj, string formula) { Text = formula; return(GetMatchingChildren(obj)); }
/// <summary> /// Return a sequence of all children of the parent element that are nth children /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable<IDomObject> ChildMatches(IDomContainer element) { return NthC.NthChilds(element,Parameters[0],false); }
/// <summary> /// Return the children of the parent element that are the only elements of that type among the /// other children. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable <IDomObject> ChildMatches(IDomContainer element) { return(OnlyChildOfAnyType(element)); }
/// <summary> /// Return the children of the parent element that are the only elements of that type among the /// other children. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public override IEnumerable<IDomObject> ChildMatches(IDomContainer element) { return OnlyChildOfAnyType(element); }
/// <summary> /// Enumerates nth children of the same type as the parent. /// </summary> /// /// <remarks> /// This could be implemented more efficiently, but it's a bit complicated because we need to keep track of n /// for each type of element /// </remarks> /// <param name="element"> /// The parent element. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// <param name="fromLast"> /// Count from the last element instead of the first. /// </param> /// /// <returns> /// A sequence of matching elements /// </returns> public IEnumerable<IDomObject> NthChildsOfType(IDomContainer element, string formula, bool fromLast = false) { return element.ChildElements .Where(item=>IsNthChildOfType(item,formula,fromLast)); }
/// <summary> /// Enumerates nth children of the same type as the parent. /// </summary> /// /// <remarks> /// This could be implemented more efficiently, but it's a bit complicated because we need to keep track of n /// for each type of element /// </remarks> /// <param name="element"> /// The parent element. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// <param name="fromLast"> /// Count from the last element instead of the first. /// </param> /// /// <returns> /// A sequence of matching elements /// </returns> public IEnumerable <IDomObject> NthChildsOfType(IDomContainer element, string formula, bool fromLast = false) { return(element.ChildElements .Where(item => IsNthChildOfType(item, formula, fromLast))); }
public static T AddNode <T>(this IDomContainer container, T node) where T : DomNode { container.NodeCollection.Add(node); return(node); }
/// <summary> /// Enumerates nth children in this collection. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// <param name="fromLast"> /// When true, count from the last element instead of the first. /// </param> /// /// <returns> /// An enumerator that allows foreach to be used to process nth childs in this collection. /// </returns> public IEnumerable<IDomObject> NthChilds(IDomContainer element, string formula, bool fromLast = false) { return GetMatchingChildren(element, formula, null, fromLast); }
/// <summary> /// Return each child that matches an index returned by the forumla /// </summary> /// /// <param name="obj"> /// The parent object. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// <param name="onlyNodeName"> /// The type of node to match. /// </param> /// <param name="fromLast"> /// Count from the last element instead of the first. /// </param> /// /// <returns> /// An enumerator that allows foreach to be used to process get matching children in this /// collection. /// </returns> public IEnumerable<IDomObject> GetMatchingChildren(IDomContainer obj, string formula, string onlyNodeName = null, bool fromLast = false) { OnlyNodeName = onlyNodeName; FromLast = fromLast; return GetMatchingChildren(obj, formula); }
/// <summary> /// Return each child that matches an index returned by the forumla. /// </summary> /// /// <param name="obj"> /// The parent object. /// </param> /// <param name="formula"> /// The formula for determining n. /// </param> /// /// <returns> /// An enumerator that allows foreach to be used to process get matching children in this /// collection. /// </returns> public IEnumerable<IDomObject> GetMatchingChildren(IDomContainer obj, string formula) { Text = formula; return GetMatchingChildren(obj); }
/// <summary> /// Return a sequence of all children matching the selector implementation. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public abstract IEnumerable <IDomObject> ChildMatches(IDomContainer element);
/// <summary> /// Return a sequence of all children matching the selector implementation. /// </summary> /// /// <param name="element"> /// The parent element. /// </param> /// /// <returns> /// A sequence of children that match. /// </returns> public abstract IEnumerable<IDomObject> ChildMatches(IDomContainer element);