Example #1
0
 //common part
 public static void Process(StringBuilder builder, ref ToPlainTextState state, params char[] chars)
 {
     foreach (var ch in chars)
     {
         if (char.IsWhiteSpace(ch))
         {
             if (IsHardSpace(ch))
             {
                 if (state == ToPlainTextState.WhiteSpace)
                 {
                     builder.Append(' ');
                 }
                 builder.Append(' ');
                 state = ToPlainTextState.NotWhiteSpace;
             }
             else
             {
                 if (state == ToPlainTextState.NotWhiteSpace)
                 {
                     state = ToPlainTextState.WhiteSpace;
                 }
             }
         }
         else
         {
             if (state == ToPlainTextState.WhiteSpace)
             {
                 builder.Append(' ');
             }
             builder.Append(ch);
             state = ToPlainTextState.NotWhiteSpace;
         }
     }
 }
Example #2
0
        public static string HtmlToPlainText(string htmlDoc)
        {
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(htmlDoc);
            StringBuilder    builder = new StringBuilder();
            ToPlainTextState state   = ToPlainTextState.StartLine;

            Plain(builder, ref state, new[] { doc.DocumentNode });
            return(builder.ToString());
        }
        private static void Plain(StringBuilder builder, ref ToPlainTextState state, IEnumerable <XNode> nodes)
        {
            foreach (var node in nodes)
            {
                if (node is XElement)
                {
                    var element = (XElement)node;
                    var tag     = element.Name.LocalName.ToLower();

                    if (tag == "br")
                    {
                        builder.AppendLine();
                        state = ToPlainTextState.StartLine;
                    }
                    else if (NonVisibleTags.Contains(tag))
                    {
                    }
                    else if (InlineTags.Contains(tag))
                    {
                        Plain(builder, ref state, element.Nodes());
                    }
                    else
                    {
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendLine();
                            state = ToPlainTextState.StartLine;
                        }

                        Plain(builder, ref state, element.Nodes());
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendLine();
                            state = ToPlainTextState.StartLine;
                        }
                    }
                }
                else if (node is XText)
                {
                    var text = (XText)node;
                    Process(builder, ref state, text.Value.ToCharArray());
                }
            }
        }
        private static void Plain(StringBuilder builder, ref ToPlainTextState state, IEnumerable <HtmlAgilityPack.HtmlNode> nodes)
        {
            foreach (var node in nodes)
            {
                if (node is HtmlAgilityPack.HtmlTextNode)
                {
                    var text = (HtmlAgilityPack.HtmlTextNode)node;
                    Process(builder, ref state, HtmlAgilityPack.HtmlEntity.DeEntitize(text.Text).ToCharArray());
                }
                else
                {
                    var tag = node.Name.ToLower();

                    if (tag == "br")
                    {
                        builder.AppendLine();
                        state = ToPlainTextState.StartLine;
                    }
                    else if (NonVisibleTags.Contains(tag))
                    {
                    }
                    else if (InlineTags.Contains(tag))
                    {
                        Plain(builder, ref state, node.ChildNodes);
                    }
                    else
                    {
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendLine();
                            state = ToPlainTextState.StartLine;
                        }

                        Plain(builder, ref state, node.ChildNodes);
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendLine();
                            state = ToPlainTextState.StartLine;
                        }
                    }
                }
            }
        }
Example #5
0
        private static void Plain(StringBuilder builder, ref ToPlainTextState state, IEnumerable <HtmlNode> nodes)
        {
            foreach (HtmlNode node in nodes)
            {
                if (node is HtmlTextNode)
                {
                    HtmlTextNode text  = (HtmlTextNode)node;
                    char[]       chars = HtmlEntity.DeEntitize(text.Text).ToCharArray();
                    foreach (char ch in chars)
                    {
                        if (char.IsWhiteSpace(ch))
                        {
                            if (ch == 0xA0 || ch == 0x2007 || ch == 0x202F)
                            {
                                if (state == ToPlainTextState.WhiteSpace)
                                {
                                    builder.Append(' ');
                                }
                                builder.Append(' ');
                                state = ToPlainTextState.NotWhiteSpace;
                            }
                            else
                            {
                                if (state == ToPlainTextState.NotWhiteSpace)
                                {
                                    state = ToPlainTextState.WhiteSpace;
                                }
                            }
                        }
                        else
                        {
                            if (state == ToPlainTextState.WhiteSpace)
                            {
                                builder.Append(' ');
                            }
                            builder.Append(ch);
                            state = ToPlainTextState.NotWhiteSpace;
                        }
                    }
                }
                else
                {
                    string tag = node.Name.ToLower();

                    if (tag == "br")
                    {
                        builder.AppendLine();
                        state = ToPlainTextState.StartLine;
                    }
                    else if (NonVisibleTags.Contains(tag))
                    {
                    }
                    else if (InlineTags.Contains(tag))
                    {
                        Plain(builder, ref state, node.ChildNodes);
                    }
                    else
                    {
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendLine();
                            state = ToPlainTextState.StartLine;
                        }
                        Plain(builder, ref state, node.ChildNodes);
                        if (state != ToPlainTextState.StartLine)
                        {
                            builder.AppendLine();
                            state = ToPlainTextState.StartLine;
                        }
                    }
                }
            }
        }