Exemple #1
0
        /// <summary>
        /// 尝试获取节点的文本表现形式,对于某些不支持文本表现形式的元素,将直接返回null
        /// </summary>
        /// <param name="node">要获取文本表现形式的节点</param>
        /// <returns></returns>
        public static string InnerText(this IHtmlNode node)
        {
            var textNode = node as IHtmlTextNode;

            if (textNode != null)
            {
                var parent = textNode.Parent();
                if (parent == null)
                {
                    throw new InvalidOperationException();
                }

                if (HtmlSpecification.cdataTags.Contains(parent.Name, StringComparer.OrdinalIgnoreCase))
                {
                    return(textNode.HtmlText);
                }

                else if (HtmlSpecification.preformatedElements.Contains(parent.Name, StringComparer.OrdinalIgnoreCase))
                {
                    return(HtmlEncoding.HtmlDecode(textNode.HtmlText));
                }

                else
                {
                    return(HtmlEncoding.HtmlDecode(whitespaceRegex.Replace(textNode.HtmlText, " ")));
                }
            }

            var commentNode = node as IHtmlComment;

            if (commentNode != null)
            {
                return(null);
            }

            var element = node as IHtmlElement;

            if (element != null)
            {
                if (element.Name.EqualsIgnoreCase("br"))
                {
                    return(Environment.NewLine);
                }

                else if (HtmlSpecification.nonTextElements.Contains(element.Name, StringComparer.OrdinalIgnoreCase))
                {
                    return(null);
                }
            }

            var container = node as IHtmlContainer;

            return(string.Join("", container.Nodes().Select(n => InnerText(n)).ToArray()));
        }
Exemple #2
0
        /// <summary>
        /// 尝试获取节点的文本表现形式,对于某些不支持文本表现形式的元素,将直接返回null
        /// </summary>
        /// <param name="node">要获取文本表现形式的节点</param>
        /// <returns></returns>
        public static string InnerText(this IHtmlNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            var specification = node.Document.HtmlSpecification;

            var textNode = node as IHtmlTextNode;

            if (textNode != null)
            {
                var parent = textNode.Parent();
                if (parent == null)
                {
                    throw new InvalidOperationException();
                }

                var textMode = specification.ElementTextMode(parent);

                if (textMode == TextMode.CData)
                {
                    return(textNode.HtmlText);
                }

                else if (textMode == TextMode.Preformated)
                {
                    return(HtmlEncoding.HtmlDecode(textNode.HtmlText));
                }

                else if (textMode == TextMode.Normal)
                {
                    return(HtmlEncoding.HtmlDecode(whitespaceRegex.Replace(textNode.HtmlText, " ")));
                }

                else
                {
                    return(null);
                }
            }

            var commentNode = node as IHtmlComment;

            if (commentNode != null)
            {
                return(null);
            }

            var element = node as IHtmlElement;

            if (element != null)
            {
                if (element.Name.EqualsIgnoreCase("br"))
                {
                    return(Environment.NewLine);
                }

                else if (specification.ElementTextMode(element) == TextMode.NonText)
                {
                    return(null);
                }
            }

            var container = node as IHtmlContainer;

            if (container != null)
            {
                return(string.Join("", container.Nodes().Select(n => InnerText(n)).ToArray()));
            }

            throw new NotSupportedException();
        }