// Use this after attempting to find the excerpt element because it destroys the HTML document private string GetSeparatorExcerpt(IHtmlDocument htmlDocument) { if (_separators?.Length > 0) { ITreeWalker walker = htmlDocument.CreateTreeWalker(htmlDocument.DocumentElement, FilterSettings.Comment); IComment comment = (IComment)walker.ToFirst(); while (comment != null && !_separators.Contains(comment.NodeValue.Trim(), StringComparer.OrdinalIgnoreCase)) { comment = (IComment)walker.ToNext(); } // Found the first separator if (comment != null) { // Get a clone of the parent element IElement parent = comment.ParentElement; if (parent.TagName.Equals("p", StringComparison.OrdinalIgnoreCase)) { // If we were in a tag inside a paragraph, ascend to the paragraph's parent parent = parent.ParentElement; } // Now remove everything after the separator walker = htmlDocument.CreateTreeWalker(parent); bool remove = false; Stack <INode> removeStack = new Stack <INode>(); INode node = walker.ToFirst(); while (node != null) { if (node == comment) { remove = true; } // Also remove if it's a top-level element that doesn't match the query selector if (remove || (node.Parent == parent && node is IElement && !string.IsNullOrEmpty(_querySelector) && !((IElement)node).Matches(_querySelector))) { removeStack.Push(node); } node = walker.ToNext(); } while (removeStack.Count > 0) { node = removeStack.Pop(); node.Parent.RemoveChild(node); } return(parent.InnerHtml); } } return(null); }