/// <summary> /// 获取大图html /// </summary> /// <param name="jdNum"></param> /// <returns></returns> public static string GetProBigPics(string jdNum) { string picArea = ""; try { const string bigUrl = BaseUrl + "/bigimage.aspx?id={0}"; string url = String.Format(bigUrl, jdNum); string picHtml = HtmlCls.GetHtmlByUrl(url); if (!string.IsNullOrEmpty(picHtml)) { string biger = HtmlCls.GetHtmlByCss(picHtml, "right").FirstOrDefault(); var bigList = RegexHelper.Matches(biger, "http://img10.360buyimg.com/n5([^'\"]*)"); if (bigList.Count() > 0) { picArea = "<table width=\"750\" align=\"center\" border=\"0\" cellSpacing=\"0\" cellPadding=\"0\">"; picArea = bigList.Aggregate(picArea, (current, s) => current + "<tr><td><img src=\"http://img10.360buyimg.com/n0" + s + "\" /></td></tr>"); picArea += "</table>"; } } } catch (Exception ex) { FileHelper.WriteException(ex); } return(picArea); }
private static IEnumerable <string> GetUrlsFromHtml(string url) { var urlList = new List <string>(); //int ver = GetListUrlVersion(url); int ver = 0; //有些图书html样式居然不一样~ string docHtml = HtmlCls.GetHtmlByUrl(url, SiteEncoding); //HtmlCls.GetHtmlByUrl(url, _useProxy);) if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearTrn(docHtml); var cssName = "p-img"; var listHtml = HtmlCls.GetHtmlById(docHtml, "plist"); if (listHtml.IsNullOrEmpty()) { cssName = "i-img"; listHtml = HtmlCls.GetHtmlByCss(docHtml, "list-h").FirstOrDefault(); } var list = HtmlCls.GetHtmlByCss(listHtml, cssName).Select( t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>#]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct(). ToList(); return(list); } return(urlList); }
/// <summary> /// 获取描述 /// </summary> /// <param name="docHtml"></param> /// <param name="sanId"></param> /// <returns></returns> public static string GetProDesc(string docHtml, string sanId) { var desc = ""; var area = HtmlCls.GetHtmlByCss(docHtml, "detailBox"); if (area.Count() > 0) { desc = area.Aggregate("", (current, t) => current + t); //图片居然单独一个请求 const string imgUrl = "http://www.sanfo.com/shop/product.info.asp?command=findthumb&vid={0}"; var imgs = HtmlCls.GetHtmlByUrl(String.Format(imgUrl, sanId), Encoding.UTF8); desc = Regex.Replace(desc, "<dt class=\"detailImg\" id=\"item_product_images\"></dt>", "<dt class=\"detailImg\" id=\"item_product_images\">" + imgs + "</dt>"); //排除a标签 desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", "$1"); //排除script标签 desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", ""); //清除样式 desc = Regex.Replace(desc, "(\\s*class=\"[^\"]+\")|(\\s*style=\"[^\"]+\")", ""); //替换成绝对路径 desc = Regex.Replace(desc, "src=\"(/[^\"]+)\"", "src=\"" + SanfoUrl + "$1\""); //替换三夫 desc = Regex.Replace(desc, "(三夫(户外?)?)", "本商城"); } return(desc); }
private static IEnumerable <TamllBase> GetTUrlsFromHtml(string url, out string next) { next = ""; var urls = new List <TamllBase>(); //tmall根据cookie不一样,前端显示也不一样。。 const string cookie = "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=22291aea11e397a82512118642ac0abe; passtime=1341285069752; isFirstOpen=true"; string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie); if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearBr(docHtml); next = Utils.UrlDecode(RegexHelper.Match(docHtml, "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>")); var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList"); var list = HtmlCls.GetHtmlByCss(listHtml, "product"); //1:url,2:name const string regStr = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-title['\"][^>]*title=['\"]([^'\"]+?)['\"][^>]*>"; //price const string priceReg = "<span[^>]*class=['\"]product-normal['\"][^>]*title=['\"]([^'\"\\s]+)['\"][^>]*>"; urls.AddRange(list.Select(item => new TamllBase { Url = Utils.UrlDecode(RegexHelper.Match(item, regStr, 1)), Title = RegexHelper.Match(item, regStr, 2), Price = Convert.ToDecimal(RegexHelper.Match(item, priceReg)) })); } return(urls); }
public override IEnumerable <string> SearchWord(string word) { try { string searchUrl = GetWebSiteInfo().BaseUrl + "/s/ref=nb_sb_noss_1?__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&url=search-alias%3Daps&field-keywords={0}"; var url = String.Format(searchUrl, Utils.UrlEncode(word, SiteEncoding)); using (var http = new HttpHelper(url, SiteEncoding)) { var html = http.GetHtml(); if (!html.IsNullOrEmpty()) { html = RegexHelper.ClearTrn(html); } var showList = HtmlCls.GetHtmlById(html, "atfResults") + HtmlCls.GetHtmlById(html, "btfResults"); var list = HtmlCls.GetHtmlByCss(showList, "productImage").Select( t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct(). ToList(); return(list); } } catch (Exception ex) { FileHelper.WriteException(ex); return(new List <string>()); } }
public static string GetAftersaleService(string docHtml) { string area = HtmlCls.GetHtmlById(docHtml, "detail"); if (!string.IsNullOrEmpty(area)) { var list = HtmlCls.GetHtmlByCss(area, "mc tabcon hide").ToList(); if (list.Count() >= 3) { return(RegexHelper.Match(list[2], "<[^>]*>([^<]+)<[^>]*>").Trim()); } return(""); } return(""); }
public override int GetStockCode() { try { GetHtml(SiteEncoding); var stock = HtmlCls.GetHtmlByCss(DocHtml, "availGreen").FirstOrDefault(); if (stock.IsNullOrEmpty()) { return(0); } return(1); } catch (Exception ex) { FileHelper.WriteException(ex); return(-1); } }
public override int GetStockCode() { try { GetHtml(SiteEncoding); var proNum = HtmlCls.GetHtmlByCss(DocHtml, "prodNum").ToList()[1]; proNum = Regex.Replace(proNum, "</?[0-9a-zA-Z]+[^>]*>", "").Replace("商品编号:", "").Trim(); var stockUrl = GetWebSiteInfo().BaseUrl + "/ec/homeus/browse/exactMethod.jsp?goodsNo={0}&city=71010000"; stockUrl = String.Format(stockUrl, proNum); using (var http = new HttpHelper(stockUrl, SiteEncoding)) { var html = http.GetHtml(); var str = RegexHelper.Match(html, "\"result\":\"([a-zA-Z])\""); return(str == "Y" ? 1 : 0); } } catch (Exception) { return(-1); } }
public override IEnumerable <string> GetUrlList(string listUrl) { try { using (var http = new HttpHelper(listUrl, SiteEncoding)) { var html = http.GetHtml(); html = (!html.IsNullOrEmpty() ? RegexHelper.ClearTrn(html) : http.GetHtml()); if (html.IsNullOrEmpty()) { return(new List <string>()); } var showList = HtmlCls.GetHtmlByCss(html, "pic"); var list = showList.Select(t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>;]+)(;[^\"'>]*)?[\"']?[^>]*>")) .Distinct().ToList(); return(list.Where(t => !t.IsNullOrEmpty()).Select(t => Utils.GetAbsoluteUrl(GetWebSiteInfo().BaseUrl, t)).ToList()); } } catch (Exception) { return(new List <string>()); } }
/// <summary> /// 获取右则区域html /// </summary> /// <param name="docHtml"></param> /// <returns></returns> public static string GetRigthArea(string docHtml) { return(HtmlCls.GetHtmlByCss(docHtml, "right-extra").FirstOrDefault()); }
/// <summary> /// 获取产品描述 /// </summary> /// <param name="docHtml">html文件</param> /// <param name="version">区分图书1和其他0</param> /// <returns></returns> public static string GetProDesc(string docHtml, int version) { string area = ""; try { docHtml = RegexHelper.ClearTrn(docHtml); if (version == 0) { //增加 规格描述 -2012-02-29 shy string pt = HtmlCls.GetHtmlByCss(docHtml, "Ptable").FirstOrDefault(); if (!string.IsNullOrEmpty(pt)) { area += pt; } area += HtmlCls.GetHtmlByCss(docHtml, "content").FirstOrDefault(); } else { //图书类 var list = HtmlCls.GetHtmlByCss(docHtml, "m m1"); area = list.Aggregate(area, (current, s) => current + s); string listH = HtmlCls.GetHtmlByCss(area, "list-h").FirstOrDefault(); if (!string.IsNullOrEmpty(listH)) { area = area.Replace(listH, ""); } //去除【该作者其它作品】区域 listH = HtmlCls.GetHtmlById(area, "related-works"); if (!string.IsNullOrEmpty(listH)) { area = area.Replace(listH, ""); } string sum = HtmlCls.GetHtmlById(docHtml, "summary"); //加入图书信息 var sumList = RegexHelper.Matches(sum, "<li[^>]*>(.*?)</li>").Take(9); sum = sumList.Aggregate("", (current, s) => current + "<div>" + s + "</div>"); sum = Regex.Replace(sum, "<a[^>]*href=[\"']([^'\"]+?)[\"'][^>]*>(.*?)</a>", "$2"); //排除a标签 area = sum + area; } //排除授权html string red = HtmlCls.GetHtmlByAttr(area, "color=\"red\"").FirstOrDefault(); if (!string.IsNullOrEmpty(red)) { area = area.Replace(red, ""); } area = area.Replace("class=\"content\"", ""); //排除样式冲突 //area = Regex.Replace(area, "class=['\"][^'\"]*['\"]", "");//排除样式冲突.终极 area = Regex.Replace(area, "<a[^>]*href=[\"']([^'\"]+?)[\"'][^>]*>(.*?)</a>", "$2"); //排除a标签 area = Regex.Replace(area, "\\sstyle=(['\"])[^'\"]+?\\1", ""); //排除样式 area = Regex.Replace(area, "<script[^>]*>(.*?)</script>", ""); //排除script标签 area = Regex.Replace(area, "src\\d=", "src="); //显示src area = Regex.Replace(area, "京东商城|京东", "本商城"); //排除京东字样 } catch (Exception ex) { FileHelper.WriteException(ex); } return(area); }