public override IEnumerable <string> GetUrlList(string listUrl) { try { using (var http = new HttpHelper(listUrl, SiteEncoding)) { var html = http.GetHtml(); var url = http.GetRequestUrl(); if (Regex.IsMatch(url, "^http://www.newegg.com.cn/Product/[0-9a-zA-Z\\-]+.htm$")) { return new List <string> { url } } ; if (!html.IsNullOrEmpty()) { html = RegexHelper.ClearTrn(html); } var showList = HtmlCls.GetHtmlById(html, "itemGrid1"); var list = RegexHelper.Matches(showList, "(http://www.newegg.com.cn/Product/[0-9a-zA-Z\\-]+.htm)").Distinct() .ToList(); return (list.Where(t => !t.IsNullOrEmpty()).Select( t => Utils.GetAbsoluteUrl(GetWebSiteInfo().BaseUrl, t)).ToList()); } } catch (Exception) { return(new List <string>()); } }
public override string GetProName() { GetHtml(SiteEncoding); string area = HtmlCls.GetHtmlById(DocHtml, "name"); return(RegexHelper.Match(area, "<h1>([^<]*)<")); }
private static IEnumerable <TamllBase> GetTUrlsFromHtml(string url, out string next) { next = ""; var urls = new List <TamllBase>(); //tmall根据cookie不一样,前端显示也不一样。。 const string cookie = "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=22291aea11e397a82512118642ac0abe; passtime=1341285069752; isFirstOpen=true"; string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie); if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearBr(docHtml); next = Utils.UrlDecode(RegexHelper.Match(docHtml, "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>")); var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList"); var list = HtmlCls.GetHtmlByCss(listHtml, "product"); //1:url,2:name const string regStr = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-title['\"][^>]*title=['\"]([^'\"]+?)['\"][^>]*>"; //price const string priceReg = "<span[^>]*class=['\"]product-normal['\"][^>]*title=['\"]([^'\"\\s]+)['\"][^>]*>"; urls.AddRange(list.Select(item => new TamllBase { Url = Utils.UrlDecode(RegexHelper.Match(item, regStr, 1)), Title = RegexHelper.Match(item, regStr, 2), Price = Convert.ToDecimal(RegexHelper.Match(item, priceReg)) })); } return(urls); }
/// <summary> /// 获取描述 /// </summary> /// <param name="docHtml"></param> /// <param name="sanId"></param> /// <returns></returns> public static string GetProDesc(string docHtml, string sanId) { var desc = ""; var area = HtmlCls.GetHtmlByCss(docHtml, "detailBox"); if (area.Count() > 0) { desc = area.Aggregate("", (current, t) => current + t); //图片居然单独一个请求 const string imgUrl = "http://www.sanfo.com/shop/product.info.asp?command=findthumb&vid={0}"; var imgs = HtmlCls.GetHtmlByUrl(String.Format(imgUrl, sanId), Encoding.UTF8); desc = Regex.Replace(desc, "<dt class=\"detailImg\" id=\"item_product_images\"></dt>", "<dt class=\"detailImg\" id=\"item_product_images\">" + imgs + "</dt>"); //排除a标签 desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", "$1"); //排除script标签 desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", ""); //清除样式 desc = Regex.Replace(desc, "(\\s*class=\"[^\"]+\")|(\\s*style=\"[^\"]+\")", ""); //替换成绝对路径 desc = Regex.Replace(desc, "src=\"(/[^\"]+)\"", "src=\"" + SanfoUrl + "$1\""); //替换三夫 desc = Regex.Replace(desc, "(三夫(户外?)?)", "本商城"); } return(desc); }
public override IEnumerable <string> SearchWord(string word) { try { string searchUrl = GetWebSiteInfo().BaseUrl + "/s/ref=nb_sb_noss_1?__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&url=search-alias%3Daps&field-keywords={0}"; var url = String.Format(searchUrl, Utils.UrlEncode(word, SiteEncoding)); using (var http = new HttpHelper(url, SiteEncoding)) { var html = http.GetHtml(); if (!html.IsNullOrEmpty()) { html = RegexHelper.ClearTrn(html); } var showList = HtmlCls.GetHtmlById(html, "atfResults") + HtmlCls.GetHtmlById(html, "btfResults"); var list = HtmlCls.GetHtmlByCss(showList, "productImage").Select( t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct(). ToList(); return(list); } } catch (Exception ex) { FileHelper.WriteException(ex); return(new List <string>()); } }
/// <summary> /// 获取首图 /// </summary> /// <param name="docHtml"></param> /// <returns></returns> public static string GetBigPic(string docHtml) { string area = HtmlCls.GetHtmlById(docHtml, "spec-n1"); string src = RegexHelper.Match(area, "<img[^>]*src=['\"]([^'\"]*)['\"][^>]*>"); return(src); }
private static IEnumerable <string> GetUrlsFromHtml(string url) { var urlList = new List <string>(); //int ver = GetListUrlVersion(url); int ver = 0; //有些图书html样式居然不一样~ string docHtml = HtmlCls.GetHtmlByUrl(url, SiteEncoding); //HtmlCls.GetHtmlByUrl(url, _useProxy);) if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearTrn(docHtml); var cssName = "p-img"; var listHtml = HtmlCls.GetHtmlById(docHtml, "plist"); if (listHtml.IsNullOrEmpty()) { cssName = "i-img"; listHtml = HtmlCls.GetHtmlByCss(docHtml, "list-h").FirstOrDefault(); } var list = HtmlCls.GetHtmlByCss(listHtml, cssName).Select( t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>#]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct(). ToList(); return(list); } return(urlList); }
/// <summary> /// 获取大图html /// </summary> /// <param name="jdNum"></param> /// <returns></returns> public static string GetProBigPics(string jdNum) { string picArea = ""; try { const string bigUrl = BaseUrl + "/bigimage.aspx?id={0}"; string url = String.Format(bigUrl, jdNum); string picHtml = HtmlCls.GetHtmlByUrl(url); if (!string.IsNullOrEmpty(picHtml)) { string biger = HtmlCls.GetHtmlByCss(picHtml, "right").FirstOrDefault(); var bigList = RegexHelper.Matches(biger, "http://img10.360buyimg.com/n5([^'\"]*)"); if (bigList.Count() > 0) { picArea = "<table width=\"750\" align=\"center\" border=\"0\" cellSpacing=\"0\" cellPadding=\"0\">"; picArea = bigList.Aggregate(picArea, (current, s) => current + "<tr><td><img src=\"http://img10.360buyimg.com/n0" + s + "\" /></td></tr>"); picArea += "</table>"; } } } catch (Exception ex) { FileHelper.WriteException(ex); } return(picArea); }
public override string GetProPic() { GetHtml(SiteEncoding); string area = HtmlCls.GetHtmlById(DocHtml, "spec-n1"); string src = RegexHelper.Match(area, "<img[^>]*src=['\"]([^'\"]*)['\"][^>]*>"); return(src); }
/// <summary> /// 根据商品编号获取链接(合作方式变更) /// 不能提供分类编号,只能用强大的搜索功能了~ /// </summary> /// <param name="num">商品编号</param> /// <returns></returns> public static string GetProUrlFromNum(string num) { string searchUrl = RedBabyUrl + "/search?keyword=" + num; string html = HtmlCls.GetHtmlByUrl(searchUrl, Encoding.UTF8); const string proReg = "<div[^>]*class=\"globalProductName\"[^>]*>\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>"; return(Utils.GetAbsoluteUrl(RedBabyUrl, RegexHelper.Match(html, proReg))); }
public static string GetPackingList(string docHtml) { string area = HtmlCls.GetHtmlById(docHtml, "bzqd"); if (!string.IsNullOrEmpty(area)) { return(RegexHelper.Match(area, "<[^>]*>([^<]+)<[^>]*>").Trim()); } return(""); }
public static string GetBrandName(string docHtml) { string area = HtmlCls.GetHtmlById(docHtml, "i-detail"); if (!string.IsNullOrEmpty(area)) { return(RegexHelper.Match(area, "<li[^>]*>生产厂家:<a[^>]*brand[^>]*>([^<]+)</a>").Trim()); } return(""); }
public override string GetProPic() { try { GetHtml(SiteEncoding); var pic = HtmlCls.GetHtmlById(DocHtml, "bgPics"); return(RegexHelper.Match(pic, "\\s+src=[\"']([^\"'>]+)[\"']")); } catch (Exception) { return(""); } }
public override string GetProName() { try { GetHtml(SiteEncoding); var name = HtmlCls.GetHtmlById(DocHtml, "title-descript"); return(Regex.Replace(name, "</?[0-9a-zA-Z]+[^>]*>", "").Trim()); } catch (Exception) { return(""); } }
private static IEnumerable <string> GetUrlsFromHtml(string url) { var urls = new List <string>(); string docHtml = HtmlCls.GetHtmlByUrl(url); if (!string.IsNullOrEmpty(docHtml)) { var listHtml = HtmlCls.GetHtmlById(docHtml, "Id_prodItemList"); const string regStr = "<div[^>]*class=['\"]proPic['\"][^>]*><a[^>]*href=['\"]([^'\"]+)['\"][^>]*>"; urls = RegexHelper.Matches(listHtml, regStr); urls = urls.Select(t => (t.StartsWith("/") ? SanfoUrl + t : t)).ToList(); } return(urls); }
/// <summary> /// 获取成都地区库存Code /// </summary> /// <param name="docHtml">页面html</param> /// <returns>-1,未知;0,缺货;1,有货</returns> public static int GetCdStockCode(string docHtml) { int code = -1; try { docHtml = RegexHelper.ClearBr(docHtml); string stockUrl = RegexHelper.Match(docHtml, "(http://price.360buy.com/ows/stock/pshow-[a-zA-Z0-9]*.html)"); if (string.IsNullOrEmpty(stockUrl)) { string skUid = RegexHelper.Match(docHtml, "wareinfo.*sid[^\"]*\"([0-9a-zA-Z]*)\""); //省级库存 string purl = "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=provincestock&skuid=" + skUid + "&provinceid=22"; //市级库存 //string url = // "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=pcstock&skuid=" + // skUid + "&provinceid=22&cityid=1930"; string stockHtml = HtmlCls.GetHtmlByUrl(purl, SiteEncoding); if (!string.IsNullOrEmpty(stockHtml)) { string stockCode = RegexHelper.Match(stockHtml, "\"StockState\":(\\w+),"); code = (stockCode == "33" ? 1 : 0); } } else { string stockHtml = HtmlCls.GetHtmlByUrl(stockUrl, SiteEncoding); //源代码中有库存连接<script type="text/javascript" src="http://price.360buy.com/ows/stock/pshow-0F5D9F92C79383CED35C7903D3927987.html"></script> //内容如下:var stockdata = [{"Wid":183192,"Rid":6,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":3,"Stock":33,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":10,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":4,"Stock":33,"Days":0,"Purchase":0,"IsPop":false}]; //js主要代码如下:var orgname = { 6: "北京仓", 3: "上海仓", 10: "广州仓", 4: "成都仓", 5: "武汉仓", 7: "南京仓", 8: "济南仓", 9: "沈阳仓" }; //var stockstatus = { 33: "现货", 34: "无货", 36: "预定", 39: "在途", 0: "统计中" }; if (!string.IsNullOrEmpty(stockHtml)) { string stockCode = RegexHelper.Match(stockHtml, "\"Rid\":4,\"Stock\":([^,]+),"); //仅仅对成都仓库 code = (stockCode == "33" ? 1 : 0); } } } catch (Exception ex) { FileHelper.WriteException(ex); } return(code); }
public override string GetProPic() { try { GetHtml(SiteEncoding); var str = HtmlCls.GetHtmlById(DocHtml, "prodImageCell"); str = RegexHelper.Match(str, "\\s+src=[\"']([^\"'>]+)[\"']"); return(str); } catch (Exception ex) { FileHelper.WriteException(ex); return(""); } }
public static string GetProDesc(string link) { //id=J_DivItemDesc var desc = ""; var docHtml = HtmlCls.GetHtmlByUrl(link); var desurl = RegexHelper.Match(docHtml, "\"apiItemDesc\":\"([^\"]+?)\""); if (!string.IsNullOrEmpty(desurl)) { desurl = desurl.Replace("\\", ""); desc = HtmlCls.GetHtmlByUrl(desurl); desc = desc.Replace("var desc='", "").TrimEnd('\''); } return(desc); }
public override string GetProPic() { try { GetHtml(SiteEncoding); var str = HtmlCls.GetHtmlById(DocHtml, "midImg"); str = HtmlCls.GetAttrValue(str, "src340"); return(Regex.Replace(str, "\\?.*$", "")); } catch (Exception ex) { FileHelper.WriteException(ex); return(""); } }
public override string GetProName() { try { GetHtml(SiteEncoding); var str = HtmlCls.GetHtmlById(DocHtml, "btAsinTitle"); str = Regex.Replace(str, "</?[0-9a-zA-Z]+[^>]*>", ""); return(str); } catch (Exception ex) { FileHelper.WriteException(ex); return(""); } }
/// <summary> /// 产品描述 /// </summary> /// <param name="docHtml"></param> /// <returns></returns> public static string GetDescFromHtml(string docHtml) { string desc = HtmlCls.GetHtmlById(docHtml, "productDescription"); if (!string.IsNullOrEmpty(desc)) { desc = Regex.Replace(desc, @"红孩子母婴商城|红孩子", "本商场"); desc = desc.Replace("id=\"productDescription\"", ""); //排除样式冲突 //area = Regex.Replace(area, "class=['\"][^'\"]*['\"]", "");//排除样式冲突.终极 desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", ""); //排除a标签 desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", ""); //排除script标签 desc = Regex.Replace(desc, "src\\d=", "src="); //显示src } return(desc); }
public static string GetAftersaleService(string docHtml) { string area = HtmlCls.GetHtmlById(docHtml, "detail"); if (!string.IsNullOrEmpty(area)) { var list = HtmlCls.GetHtmlByCss(area, "mc tabcon hide").ToList(); if (list.Count() >= 3) { return(RegexHelper.Match(list[2], "<[^>]*>([^<]+)<[^>]*>").Trim()); } return(""); } return(""); }
/// <summary> /// 获取价格信息[market_price]、[price] /// </summary> /// <param name="rNum">002站点产品Id</param> /// <returns></returns> //public static JsonCls.JsonObject GetPriceInfo(string rNum) //{ // const string baseUrl = "http://www.redbaby.com.cn/catalog/category/getPriceInfo?ids={0}"; // string url = String.Format(baseUrl, rNum); // string info = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8); // if (!string.IsNullOrEmpty(info) && info != "[]") // { // JsonCls.JsonObject json = JsonCls.JsonConvert.DeserializeObject(info); // //初始化 // JsonCls.JsonConvert.SetJson(new JsonCls.JsonObject()); // return (JsonCls.JsonObject)json[rNum]; // } // return null; //} /// <summary> /// 判断四川是否有货 /// </summary> /// <param name="proId">002站点产品ID</param> /// <returns></returns> public static bool CheckScStock(string proId) { const string baseUrl = "http://www.redbaby.com.cn/catalog/product/getStockInfo?id={0}"; string url = String.Format(baseUrl, proId); string info = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8); info = Encoding.UTF8.GetString(Encoding.Default.GetBytes(info)); if (!string.IsNullOrEmpty(info)) { if (info.IndexOf("有货") > -1 && info.IndexOf("四川") > -1) { return(true); } return(false); } return(false); }
/// <summary> /// 根据列表页获取产品链接 /// </summary> /// <param name="listUrl">列表链接</param> /// <param name="deepth">扫描深度</param> /// <returns></returns> public static IEnumerable <string> GetUrlListFromList(string listUrl, int deepth) { var listArea = new List <string>(); try { listUrl = listUrl.TrimEnd('/'); var docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8); if (string.IsNullOrEmpty(listUrl)) { return(listArea); } docHtml = RegexHelper.ClearTrn(docHtml); const string prolink = "<div[^>]*class=\"globalProductName\">\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>"; var page = 1; listArea = RegexHelper.Matches(docHtml, prolink); //分页处理 int count = listArea.Count(); while (count < deepth) { var pstr = RegexHelper.Match(listUrl, @"p(\d+)?$"); if (!string.IsNullOrEmpty(pstr)) { page = Convert.ToInt32(pstr); } page++; listUrl = Regex.Replace(listUrl, @"p(\d+)$", "") + "p" + page; docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8); if (string.IsNullOrEmpty(docHtml)) { break; } listArea.AddRange(RegexHelper.Matches(docHtml, prolink)); count = listArea.Count(); } listArea = listArea.Take(deepth).ToList(); } catch (Exception ex) { FileHelper.WriteException(Err, ex); } return(listArea); }
public override int GetStockCode() { try { GetHtml(SiteEncoding); var stock = HtmlCls.GetHtmlByCss(DocHtml, "availGreen").FirstOrDefault(); if (stock.IsNullOrEmpty()) { return(0); } return(1); } catch (Exception ex) { FileHelper.WriteException(ex); return(-1); } }
public override IEnumerable <string> GetUrlList(string listUrl) { try { using (var http = new HttpHelper(listUrl, SiteEncoding)) { var html = http.GetHtml(); html = RegexHelper.ClearTrn(html); var showList = HtmlCls.GetHtmlById(html, "proShow"); var linkReg = "<a[^>]*href=[\"']?(" + GetWebSiteInfo().BaseUrl + "/emall/prd_\\d+_\\d+_-\\d+_\\d+_.html)[\"']?[^>]*>"; var list = RegexHelper.Matches(showList, linkReg).Distinct().ToList(); return(list); } } catch (Exception ex) { FileHelper.WriteException(ex); return(new List <string>()); } }
public override int GetStockCode() { try { GetHtml(SiteEncoding); var proNum = HtmlCls.GetHtmlByCss(DocHtml, "prodNum").ToList()[1]; proNum = Regex.Replace(proNum, "</?[0-9a-zA-Z]+[^>]*>", "").Replace("商品编号:", "").Trim(); var stockUrl = GetWebSiteInfo().BaseUrl + "/ec/homeus/browse/exactMethod.jsp?goodsNo={0}&city=71010000"; stockUrl = String.Format(stockUrl, proNum); using (var http = new HttpHelper(stockUrl, SiteEncoding)) { var html = http.GetHtml(); var str = RegexHelper.Match(html, "\"result\":\"([a-zA-Z])\""); return(str == "Y" ? 1 : 0); } } catch (Exception) { return(-1); } }
public static decimal GetMarketerPrice(string docHtml) { decimal mprice; try { string str = HtmlCls.GetHtmlById(docHtml, "summary"); str = RegexHelper.Match(str, "<del>¥([^<]+)</del>"); if (string.IsNullOrEmpty(str)) { str = HtmlCls.GetHtmlById(docHtml, "book-price"); str = RegexHelper.Match(str, "<del>¥([^<]+)</del>"); } mprice = decimal.Parse(str.Replace(",", "")); } catch (Exception) { mprice = 0; } return(mprice > 100 ? Math.Round(mprice, 0) : Math.Round(mprice, 1)); }
private static IEnumerable <string> GetUrlsFromHtml(string url, out string next) { next = ""; var urls = new List <string>(); //tmall根据cookie不一样,前端显示也不一样。。 const string cookie = "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=8eb29ff22cbe3bddcad34d264d01806f; passtime=1341280687588; isFirstOpen=false; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0"; string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie); if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearBr(docHtml); next = Utils.UrlDecode(RegexHelper.Match(docHtml, "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>")); var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList"); const string regStr = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-Img['\"][^>]*>"; //"<a[^>]*class=['\"]product-title['\"][^>]*href=['\"]([^'\"]+)['\"][^>]*>"; urls = RegexHelper.Matches(listHtml, regStr); urls = urls.Select(t => (t.StartsWith("/") ? BaseUrl : "") + Utils.UrlDecode(t)).ToList(); } return(urls); }
public override IEnumerable <string> GetUrlList(string listUrl) { try { using (var http = new HttpHelper(listUrl, SiteEncoding)) { var html = http.GetHtml(); html = (!html.IsNullOrEmpty() ? RegexHelper.ClearTrn(html) : http.GetHtml()); if (html.IsNullOrEmpty()) { return(new List <string>()); } var showList = HtmlCls.GetHtmlByCss(html, "pic"); var list = showList.Select(t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>;]+)(;[^\"'>]*)?[\"']?[^>]*>")) .Distinct().ToList(); return(list.Where(t => !t.IsNullOrEmpty()).Select(t => Utils.GetAbsoluteUrl(GetWebSiteInfo().BaseUrl, t)).ToList()); } } catch (Exception) { return(new List <string>()); } }