示例#1
0
 private void ParseHtmlChildren(HtmlFormatPlainText format, HtmlNode node, StringBuilder sb)
 {
     foreach (HtmlNode child in node.ChildNodes)
     {
         ParseHtml(format, child, sb);
     }
 }
示例#2
0
        private string ConvertHtmlToPlainText(HtmlFormatPlainText format, string text)
        {
            StringBuilder sb   = new StringBuilder();
            HtmlDocument  html = new HtmlDocument();

            html.LoadHtml(text);
            ParseHtml(format, html.DocumentNode, sb);

            return(TrimLines(sb.ToString()));
        }
示例#3
0
        private void ParseHtml(HtmlFormatPlainText format, HtmlNode node, StringBuilder sb)
        {
            string html;

            switch (node.NodeType)
            {
            case HtmlNodeType.Comment:
                // Don't output comments
                break;

            case HtmlNodeType.Document:
                ParseHtmlChildren(format, node, sb);
                break;

            case HtmlNodeType.Text:
                string parentName = node.ParentNode.Name;
                if (parentName.Equals("script") || parentName.Equals("style"))
                {
                    // Ignore scripts and styles
                    break;
                }

                html = ((HtmlTextNode)node).Text;

                if (HtmlNode.IsOverlappedClosingElement(html))
                {
                    // Is it in fact a special closing node output as text?
                    break;
                }

                sb.Append(HtmlEntity.DeEntitize(html));
                break;

            case HtmlNodeType.Element:
                HtmlFormatPlainText nextFormat = format;

                switch (node.Name)
                {
                case "p":
                    sb.Append(Environment.NewLine);
                    break;

                case "ol":
                    nextFormat = new HtmlFormatPlainText(HtmlFormatMode.OrderedList)
                    {
                        Indent = format.Indent + 1
                    };
                    break;

                case "ul":
                    nextFormat = new HtmlFormatPlainText(HtmlFormatMode.UnorderedList)
                    {
                        Indent = format.Indent + 1
                    };
                    break;

                case "li":
                    nextFormat.Counter += 1;
                    sb.Append(' ', nextFormat.Indent);
                    switch (nextFormat.Mode)
                    {
                    case HtmlFormatMode.OrderedList:
                        sb.Append(string.Format("{0}. ", nextFormat.Counter));
                        break;

                    case HtmlFormatMode.UnorderedList:
                        sb.Append("* ");
                        break;
                    }
                    break;
                }

                if (node.HasChildNodes)
                {
                    ParseHtmlChildren(nextFormat, node, sb);
                }
                break;
            }
        }