Example #1
0
        /// <summary>
        /// Конвертировать HTML в текст.
        /// </summary>
        /// <param name="node"></param>
        /// <param name="outText"></param>
        /// <param name="documentFactory">Фабрика документов.</param>
        public static void ConvertHtmlToText(IHtmlNode node, TextWriter outText, IHtmlDocumentFactory documentFactory)
        {
            if (documentFactory == null)
            {
                throw new ArgumentNullException(nameof(documentFactory));
            }
            switch (node)
            {
            case IHtmlCommentNode _:
                // don't output comments
                break;

            case IHtmlTextNode tn:
                // script and style must not be output
                string parentName = tn.ParentNode.Name;
                if ((parentName == "script") || (parentName == "style"))
                {
                    break;
                }

                // get text
                var html = tn.Text;

                // is it in fact a special closing node output as text?
                if (documentFactory.IsOverlappedClosingElement(html))
                {
                    break;
                }

                // check the text is meaningful and not a bunch of whitespaces
                if (html.Trim().Length > 0)
                {
                    outText.Write(documentFactory.DeEntitize(html));
                }
                break;

            default:
                switch (node.Name)
                {
                case "p":
                    // treat paragraphs as crlf
                    outText.WriteLine();
                    break;
                }

                if (node.HasChildNodes)
                {
                    ConvertContentTo(node, outText, documentFactory);
                }
                break;
            }
        }