/// <summary> /// 对文章内容的清洗。依据TOP20总结出的,较为广泛适用的规则 /// </summary> /// <param name="nodes">通过ItemContentXPath选出的nodes</param> /// <param name="Url">该url,用于FormatHtml函数以整理文章格式</param> /// <param name="Format">是否运用FormatHtml来进行文章格式的整理。若否,则在后期会清洗掉p、br等标签</param> /// <returns></returns> public static string CleanContent(HtmlNodeCollection nodes, string Url, bool Format = true) { string Content = string.Empty; foreach (HtmlNode cnode in nodes) { string temp = HtmlFormatter.FormatHtml(cnode.InnerHtml, Url); temp = CleanContent_CleanEditor(temp); temp = CleanContent_CleanA(temp); if (!Format) { temp = TextCleaner.FullClean(temp); } Content += temp; } return(Content); }