private void ParseHtmlChildren(HtmlFormatPlainText format, HtmlNode node, StringBuilder sb) { foreach (HtmlNode child in node.ChildNodes) { ParseHtml(format, child, sb); } }
private string ConvertHtmlToPlainText(HtmlFormatPlainText format, string text) { StringBuilder sb = new StringBuilder(); HtmlDocument html = new HtmlDocument(); html.LoadHtml(text); ParseHtml(format, html.DocumentNode, sb); return(TrimLines(sb.ToString())); }
private void ParseHtml(HtmlFormatPlainText format, HtmlNode node, StringBuilder sb) { string html; switch (node.NodeType) { case HtmlNodeType.Comment: // Don't output comments break; case HtmlNodeType.Document: ParseHtmlChildren(format, node, sb); break; case HtmlNodeType.Text: string parentName = node.ParentNode.Name; if (parentName.Equals("script") || parentName.Equals("style")) { // Ignore scripts and styles break; } html = ((HtmlTextNode)node).Text; if (HtmlNode.IsOverlappedClosingElement(html)) { // Is it in fact a special closing node output as text? break; } sb.Append(HtmlEntity.DeEntitize(html)); break; case HtmlNodeType.Element: HtmlFormatPlainText nextFormat = format; switch (node.Name) { case "p": sb.Append(Environment.NewLine); break; case "ol": nextFormat = new HtmlFormatPlainText(HtmlFormatMode.OrderedList) { Indent = format.Indent + 1 }; break; case "ul": nextFormat = new HtmlFormatPlainText(HtmlFormatMode.UnorderedList) { Indent = format.Indent + 1 }; break; case "li": nextFormat.Counter += 1; sb.Append(' ', nextFormat.Indent); switch (nextFormat.Mode) { case HtmlFormatMode.OrderedList: sb.Append(string.Format("{0}. ", nextFormat.Counter)); break; case HtmlFormatMode.UnorderedList: sb.Append("* "); break; } break; } if (node.HasChildNodes) { ParseHtmlChildren(nextFormat, node, sb); } break; } }