Exemplo n.º 1
0
        /// <summary>
        /// 对文章内容的清洗。依据TOP20总结出的,较为广泛适用的规则
        /// </summary>
        /// <param name="nodes">通过ItemContentXPath选出的nodes</param>
        /// <param name="Url">该url,用于FormatHtml函数以整理文章格式</param>
        /// <param name="Format">是否运用FormatHtml来进行文章格式的整理。若否,则在后期会清洗掉p、br等标签</param>
        /// <returns></returns>
        public static string CleanContent(HtmlNodeCollection nodes, string Url, bool Format = true)
        {
            string Content = string.Empty;

            foreach (HtmlNode cnode in nodes)
            {
                string temp = HtmlFormatter.FormatHtml(cnode.InnerHtml, Url);
                temp = CleanContent_CleanEditor(temp);
                temp = CleanContent_CleanA(temp);
                if (!Format)
                {
                    temp = TextCleaner.FullClean(temp);
                }
                Content += temp;
            }
            return(Content);
        }