private static IList <SimplifiedHtmlContentItem> ExtactContent(HtmlNode node) { var list = new List <SimplifiedHtmlContentItem>(); SimplifiedHtmlContentItem aggregator = null; foreach (var child in node.ChildNodes) { SimplifiedHtmlContentItem contentItem = null; if (child.IsBlock()) { if (aggregator != null) { list.Add(aggregator); aggregator = null; } contentItem = new SimplifiedHtmlContentItem() { Name = child.Name }; contentItem.Attributes = child.ExtractAttributes(); contentItem.ChildElements = ExtactContent(child); } else if (child.IsTextOrInline()) { if (aggregator == null) { aggregator = new SimplifiedHtmlContentItem() { IsLeaf = true } } ; aggregator.Content += child.OuterHtml; } if (contentItem != null) { list.Add(contentItem); } } if (aggregator != null) { list.Add(aggregator); } return(list); }
private static IList <SimplifiedHtmlContentItem> NormalizeList(IList <SimplifiedHtmlContentItem> list) { var newList = new List <SimplifiedHtmlContentItem>(); foreach (var item in list) { SimplifiedHtmlContentItem newItem = item; if (item.ChildElements.Count == 1 && item.ChildElements[0].IsLeaf) { newItem = new SimplifiedHtmlContentItem() { Name = item.Name }; newItem.Content = item.ChildElements[0].Content; newItem.Attributes = item.Attributes; } else { newItem.ChildElements = NormalizeList(item.ChildElements); } newList.Add(newItem); } return(newList); }