Exemplo n.º 1
0
        /// <summary>
        /// 将文本解析为产生同样文本流效果的 HTML 片段
        /// </summary>
        /// <param name="text">要解析的文本</param>
        /// <param name="manager">HTML 片段管理器</param>
        /// <returns>HTML 片段</returns>
        public static IHtmlFragment ParseText(string text, IHtmlFragmentManager manager)
        {
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }

            if (manager == null)
            {
                throw new ArgumentNullException("manager");
            }

            var parsed = HtmlEncoding.HtmlEncode(text);

            parsed = parsed.Replace("  ", "&nbsp; ");

            if (parsed.EndsWith("  "))
            {
                parsed = parsed.Substring(0, parsed.Length - 1) + "&nbsp;";//如果末尾多出一个空格,则替换为&nbsp;
            }
            parsed = parsed.Replace("\r\n", "\n").Replace("\r", "\n");

            parsed = parsed.Replace("\n", "<br />");
            return(manager.ParseFragment(parsed));
        }
Exemplo n.º 2
0
        /// <summary>
        /// 尝试获取节点的文本表现形式,对于某些不支持文本表现形式的元素,将直接返回null
        /// </summary>
        /// <param name="node">要获取文本表现形式的节点</param>
        /// <returns></returns>
        public static string InnerText(this IHtmlNode node)
        {
            var textNode = node as IHtmlTextNode;

            if (textNode != null)
            {
                var parent = textNode.Parent();
                if (parent == null)
                {
                    throw new InvalidOperationException();
                }

                if (HtmlSpecification.cdataTags.Contains(parent.Name, StringComparer.OrdinalIgnoreCase))
                {
                    return(textNode.HtmlText);
                }

                else if (HtmlSpecification.preformatedElements.Contains(parent.Name, StringComparer.OrdinalIgnoreCase))
                {
                    return(HtmlEncoding.HtmlDecode(textNode.HtmlText));
                }

                else
                {
                    return(HtmlEncoding.HtmlDecode(whitespaceRegex.Replace(textNode.HtmlText, " ")));
                }
            }

            var commentNode = node as IHtmlComment;

            if (commentNode != null)
            {
                return(null);
            }

            var element = node as IHtmlElement;

            if (element != null)
            {
                if (element.Name.EqualsIgnoreCase("br"))
                {
                    return(Environment.NewLine);
                }

                else if (HtmlSpecification.nonTextElements.Contains(element.Name, StringComparer.OrdinalIgnoreCase))
                {
                    return(null);
                }
            }

            var container = node as IHtmlContainer;

            return(string.Join("", container.Nodes().Select(n => InnerText(n)).ToArray()));
        }
Exemplo n.º 3
0
        /// <summary>
        /// 使用指定文本替换元素内容(警告,此方法会清除元素所有内容)
        /// </summary>
        /// <param name="element">要替换内容的元素</param>
        /// <param name="text">文本内容</param>
        /// <param name="encodeWhiteSpaces">是否编码空白字符</param>
        public static T InnerText <T>(this T element, string text, bool encodeWhiteSpaces) where T : IHtmlElement
        {
            if (element == null)
            {
                throw new ArgumentNullException("element");
            }

            var modifier = EnsureModifiable(element);
            var mode     = element.ElementTextMode();

            lock (element.SyncRoot)
            {
                ClearNodes(element);

                if (string.IsNullOrEmpty(text))//对于空输入,则只需要清空元素即可
                {
                    return(element);
                }


                if (mode == TextMode.CData)
                {
                    modifier.AddTextNode(element, text);
                }
                else if (mode == TextMode.Preformated || !encodeWhiteSpaces)
                {
                    modifier.AddTextNode(element, HtmlEncoding.HtmlEncode(text));
                }
                else if (mode == TextMode.Normal)
                {
                    var encoded = HtmlEncoding.HtmlEncode(text);

                    encoded = encoded.Replace("  ", "&nbsp; ");

                    if (encoded.EndsWith("  "))
                    {
                        encoded = encoded.Substring(0, encoded.Length - 1) + "&nbsp;";//如果末尾多出一个空格,则替换为&nbsp;
                    }
                    encoded = encoded.Replace("\r\n", "\n").Replace("\r", "\n");

                    encoded = encoded.Replace("\n", "<br />");


                    element.Document.ParseFragment(encoded).Into(element, 0);
                }
                else
                {
                    throw new InvalidOperationException("元素不包含任何文本内容,无法设置 InnerText");
                }
            }

            return(element);
        }
Exemplo n.º 4
0
        /*
         * /// <summary>
         * /// 使用指定文本替换元素内容(警告,此方法会清除元素所有内容)
         * /// </summary>
         * /// <param name="element">要替换内容的元素</param>
         * /// <param name="text">文本内容</param>
         * public static void ReplaceChildsWithText( this IHtmlElement element, string text )
         * {
         * if ( element == null )
         *  throw new ArgumentNullException( "element" );
         *
         * var factory = element.Document.GetNodeFactory();
         * if ( factory == null )
         *  throw new NotSupportedException();
         *
         * lock ( element.SyncRoot )
         * {
         *  ClearNodes( element );
         *
         *  if ( !HtmlSpecification.cdataTags.Contains( element.Name, StringComparer.OrdinalIgnoreCase ) )
         *  {
         *    var fragment = HtmlEncode( text, factory );
         *
         *    fragment.InsertTo( element, 0 );
         *  }
         *  else
         *  {
         *    var textNode = factory.CreateTextNode( text );
         *    textNode.Into( element, 0 );
         *  }
         * }
         *
         * }
         *
         * /// <summary>
         * /// 使用指定的HTML文本替换元素内容(警告,此方法会清除元素所有内容)
         * /// </summary>
         * /// <param name="element">要替换内容的元素</param>
         * /// <param name="html">要替换的HTML代码</param>
         * public static void ReplaceChildsWithHtml( this IHtmlElement element, string html )
         * {
         * if ( element == null )
         *  throw new ArgumentNullException( "element" );
         *
         * var factory = element.Document.GetNodeFactory();
         * if ( factory == null )
         *  throw new NotSupportedException();
         *
         * lock ( element.SyncRoot )
         * {
         *  ClearNodes( element );
         *
         *  if ( HtmlSpecification.cdataTags.Contains( element.Name, StringComparer.OrdinalIgnoreCase ) )
         *  {
         *    var textNode = factory.CreateTextNode( html );
         *    textNode.Into( element, 0 );
         *  }
         *  else
         *  {
         *    var fragment = factory.ParseFragment( html );
         *    fragment.InsertTo( element, 0 );
         *  }
         * }
         * }
         */



        private static HtmlFragment HtmlEncode(string text, IHtmlNodeFactory factory)
        {
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }

            if (factory == null)
            {
                throw new ArgumentNullException("factory");
            }


            var fragment = new HtmlFragment(factory);
            var encoded  = HtmlEncoding.HtmlEncode(text);

            encoded = encoded.Replace("\r\n", "\n").Replace("\r", "\n");

            int index = 0, brIndex = 0;

            while (true)
            {
                brIndex = encoded.IndexOf('\n', index);

                if (brIndex == -1)
                {
                    if (index < encoded.Length)
                    {
                        fragment.AddNode(factory.CreateTextNode(encoded.Substring(index)));
                    }

                    break;
                }



                if (index != brIndex)
                {
                    fragment.AddNode(factory.CreateTextNode(encoded.Substring(index, brIndex - index)));
                }
                fragment.AddNode(factory.CreateElement("br"));
                index = brIndex + 1;
            }

            return(fragment);
        }
Exemplo n.º 5
0
        /// <summary>
        /// 使用指定文本替换元素内容(警告,此方法会清除元素所有内容)
        /// </summary>
        /// <param name="element">要替换内容的元素</param>
        /// <param name="text">文本内容</param>
        /// <param name="encodeWhiteSpaces">是否编码空白字符</param>
        public static T InnerText <T>(this T element, string text, bool encodeWhiteSpaces) where T : IHtmlElement
        {
            if (element == null)
            {
                throw new ArgumentNullException("element");
            }

            var modifier = EnsureModifiable(element);

            lock (element.SyncRoot)
            {
                ClearNodes(element);

                if (string.IsNullOrEmpty(text))//对于空输入,则只需要清空元素即可
                {
                    return(element);
                }


                if (HtmlSpecification.cdataTags.Contains(element.Name, StringComparer.OrdinalIgnoreCase))
                {
                    modifier.AddTextNode(element, text);
                }
                else if (HtmlSpecification.preformatedElements.Contains(element.Name, StringComparer.OrdinalIgnoreCase) || !encodeWhiteSpaces)
                {
                    modifier.AddTextNode(element, HtmlEncoding.HtmlEncode(text));
                }
                else
                {
                    var encoded = HtmlEncoding.HtmlEncode(text);

                    encoded = encoded.Replace("  ", " &nbsp;");

                    encoded = encoded.Replace("\r\n", "\n").Replace("\r", "\n");

                    encoded = encoded.Replace("\n", "<br />");


                    element.Document.ParseFragment(encoded).Into(element, 0);
                }
            }

            return(element);
        }
Exemplo n.º 6
0
        /// <summary>
        /// 使用指定文本替换元素内容(警告,此方法会清除元素所有内容)
        /// </summary>
        /// <param name="element">要替换内容的元素</param>
        /// <param name="text">文本内容</param>
        /// <param name="encodeWhiteSpaces">是否编码空白字符</param>
        public static T InnerText <T>(this T element, string text, bool encodeWhiteSpaces) where T : IHtmlElement
        {
            if (element == null)
            {
                throw new ArgumentNullException("element");
            }

            var modifier = EnsureModifiable(element);
            var mode     = element.ElementTextMode();

            lock (element.SyncRoot)
            {
                ClearNodes(element);

                if (string.IsNullOrEmpty(text))//对于空输入,则只需要清空元素即可
                {
                    return(element);
                }


                if (mode == TextMode.CData)
                {
                    modifier.AddTextNode(element, text);
                }
                else if (mode == TextMode.Preformated || !encodeWhiteSpaces)
                {
                    modifier.AddTextNode(element, HtmlEncoding.HtmlEncode(text));
                }
                else if (mode == TextMode.Normal)
                {
                    ParseText(text, element.Document.FragmentManager).Into(element, 0);
                }
                else
                {
                    throw new InvalidOperationException("元素不包含任何文本内容,无法设置 InnerText");
                }
            }

            return(element);
        }
Exemplo n.º 7
0
        /// <summary>
        /// 尝试生成元素开始标签的HTML形式
        /// </summary>
        /// <param name="element">要生成HTML的元素</param>
        /// <param name="selfClosed">指示是否应产生自结束符号</param>
        /// <returns></returns>
        public static string GenerateTagHtml(IHtmlElement element, bool selfClosed)
        {
            if (element == null)
            {
                throw new ArgumentNullException("element");
            }


            var builder = new StringBuilder(20);

            builder.Append("<");
            builder.Append(element.Name);

            foreach (var attribute in element.Attributes())
            {
                builder.Append(" ");
                builder.Append(attribute.Name);
                if (attribute.AttributeValue != null)
                {
                    var specification = element.Document.HtmlSpecification;

                    if ((specification.IsUriValue(attribute) || specification.IsScriptValue(attribute)) && !attribute.AttributeValue.Contains('"'))
                    {
                        builder.Append("=\"").Append(attribute.AttributeValue).Append("\"");
                    }
                    else
                    {
                        builder.Append("=\"").Append(HtmlEncoding.HtmlAttributeEncode(attribute.AttributeValue)).Append("\"");
                    }
                }
            }

            if (selfClosed)
            {
                builder.Append(" /");
            }

            builder.Append(">");
            return(builder.ToString());
        }
Exemplo n.º 8
0
 public static IHtmlTextNode Add(this IHtmlContainer container, XText text)
 {
     return(container.AddTextNode(HtmlEncoding.HtmlEncode(text.Value)));
 }
Exemplo n.º 9
0
        /// <summary>
        /// 尝试获取节点的文本表现形式,对于某些不支持文本表现形式的元素,将直接返回null
        /// </summary>
        /// <param name="node">要获取文本表现形式的节点</param>
        /// <returns></returns>
        public static string InnerText(this IHtmlNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            var specification = node.Document.HtmlSpecification;

            var textNode = node as IHtmlTextNode;

            if (textNode != null)
            {
                var parent = textNode.Parent();
                if (parent == null)
                {
                    throw new InvalidOperationException();
                }

                var textMode = specification.ElementTextMode(parent);

                if (textMode == TextMode.CData)
                {
                    return(textNode.HtmlText);
                }

                else if (textMode == TextMode.Preformated)
                {
                    return(HtmlEncoding.HtmlDecode(textNode.HtmlText));
                }

                else if (textMode == TextMode.Normal)
                {
                    return(HtmlEncoding.HtmlDecode(whitespaceRegex.Replace(textNode.HtmlText, " ")));
                }

                else
                {
                    return(null);
                }
            }

            var commentNode = node as IHtmlComment;

            if (commentNode != null)
            {
                return(null);
            }

            var element = node as IHtmlElement;

            if (element != null)
            {
                if (element.Name.EqualsIgnoreCase("br"))
                {
                    return(Environment.NewLine);
                }

                else if (specification.ElementTextMode(element) == TextMode.NonText)
                {
                    return(null);
                }
            }

            var container = node as IHtmlContainer;

            if (container != null)
            {
                return(string.Join("", container.Nodes().Select(n => InnerText(n)).ToArray()));
            }

            throw new NotSupportedException();
        }