/// <summary> /// 获取产品描述 /// </summary> /// <param name="docHtml">html文件</param> /// <param name="version">区分图书1和其他0</param> /// <returns></returns> public static string GetProDesc(string docHtml, int version) { string area = ""; try { docHtml = RegexHelper.ClearTrn(docHtml); if (version == 0) { //增加 规格描述 -2012-02-29 shy string pt = HtmlCls.GetHtmlByCss(docHtml, "Ptable").FirstOrDefault(); if (!string.IsNullOrEmpty(pt)) { area += pt; } area += HtmlCls.GetHtmlByCss(docHtml, "content").FirstOrDefault(); } else { //图书类 var list = HtmlCls.GetHtmlByCss(docHtml, "m m1"); area = list.Aggregate(area, (current, s) => current + s); string listH = HtmlCls.GetHtmlByCss(area, "list-h").FirstOrDefault(); if (!string.IsNullOrEmpty(listH)) { area = area.Replace(listH, ""); } //去除【该作者其它作品】区域 listH = HtmlCls.GetHtmlById(area, "related-works"); if (!string.IsNullOrEmpty(listH)) { area = area.Replace(listH, ""); } string sum = HtmlCls.GetHtmlById(docHtml, "summary"); //加入图书信息 var sumList = RegexHelper.Matches(sum, "<li[^>]*>(.*?)</li>").Take(9); sum = sumList.Aggregate("", (current, s) => current + "<div>" + s + "</div>"); sum = Regex.Replace(sum, "<a[^>]*href=[\"']([^'\"]+?)[\"'][^>]*>(.*?)</a>", "$2"); //排除a标签 area = sum + area; } //排除授权html string red = HtmlCls.GetHtmlByAttr(area, "color=\"red\"").FirstOrDefault(); if (!string.IsNullOrEmpty(red)) { area = area.Replace(red, ""); } area = area.Replace("class=\"content\"", ""); //排除样式冲突 //area = Regex.Replace(area, "class=['\"][^'\"]*['\"]", "");//排除样式冲突.终极 area = Regex.Replace(area, "<a[^>]*href=[\"']([^'\"]+?)[\"'][^>]*>(.*?)</a>", "$2"); //排除a标签 area = Regex.Replace(area, "\\sstyle=(['\"])[^'\"]+?\\1", ""); //排除样式 area = Regex.Replace(area, "<script[^>]*>(.*?)</script>", ""); //排除script标签 area = Regex.Replace(area, "src\\d=", "src="); //显示src area = Regex.Replace(area, "京东商城|京东", "本商城"); //排除京东字样 } catch (Exception ex) { FileHelper.WriteException(ex); } return(area); }