private static IEnumerable <string> GetUrlsFromHtml(string url) { var urlList = new List <string>(); //int ver = GetListUrlVersion(url); int ver = 0; //有些图书html样式居然不一样~ string docHtml = HtmlCls.GetHtmlByUrl(url, SiteEncoding); //HtmlCls.GetHtmlByUrl(url, _useProxy);) if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearTrn(docHtml); var cssName = "p-img"; var listHtml = HtmlCls.GetHtmlById(docHtml, "plist"); if (listHtml.IsNullOrEmpty()) { cssName = "i-img"; listHtml = HtmlCls.GetHtmlByCss(docHtml, "list-h").FirstOrDefault(); } var list = HtmlCls.GetHtmlByCss(listHtml, cssName).Select( t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>#]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct(). ToList(); return(list); } return(urlList); }
private static IEnumerable <TamllBase> GetTUrlsFromHtml(string url, out string next) { next = ""; var urls = new List <TamllBase>(); //tmall根据cookie不一样,前端显示也不一样。。 const string cookie = "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=22291aea11e397a82512118642ac0abe; passtime=1341285069752; isFirstOpen=true"; string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie); if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearBr(docHtml); next = Utils.UrlDecode(RegexHelper.Match(docHtml, "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>")); var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList"); var list = HtmlCls.GetHtmlByCss(listHtml, "product"); //1:url,2:name const string regStr = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-title['\"][^>]*title=['\"]([^'\"]+?)['\"][^>]*>"; //price const string priceReg = "<span[^>]*class=['\"]product-normal['\"][^>]*title=['\"]([^'\"\\s]+)['\"][^>]*>"; urls.AddRange(list.Select(item => new TamllBase { Url = Utils.UrlDecode(RegexHelper.Match(item, regStr, 1)), Title = RegexHelper.Match(item, regStr, 2), Price = Convert.ToDecimal(RegexHelper.Match(item, priceReg)) })); } return(urls); }
/// <summary> /// 获取大图html /// </summary> /// <param name="jdNum"></param> /// <returns></returns> public static string GetProBigPics(string jdNum) { string picArea = ""; try { const string bigUrl = BaseUrl + "/bigimage.aspx?id={0}"; string url = String.Format(bigUrl, jdNum); string picHtml = HtmlCls.GetHtmlByUrl(url); if (!string.IsNullOrEmpty(picHtml)) { string biger = HtmlCls.GetHtmlByCss(picHtml, "right").FirstOrDefault(); var bigList = RegexHelper.Matches(biger, "http://img10.360buyimg.com/n5([^'\"]*)"); if (bigList.Count() > 0) { picArea = "<table width=\"750\" align=\"center\" border=\"0\" cellSpacing=\"0\" cellPadding=\"0\">"; picArea = bigList.Aggregate(picArea, (current, s) => current + "<tr><td><img src=\"http://img10.360buyimg.com/n0" + s + "\" /></td></tr>"); picArea += "</table>"; } } } catch (Exception ex) { FileHelper.WriteException(ex); } return(picArea); }
/// <summary> /// 获取描述 /// </summary> /// <param name="docHtml"></param> /// <param name="sanId"></param> /// <returns></returns> public static string GetProDesc(string docHtml, string sanId) { var desc = ""; var area = HtmlCls.GetHtmlByCss(docHtml, "detailBox"); if (area.Count() > 0) { desc = area.Aggregate("", (current, t) => current + t); //图片居然单独一个请求 const string imgUrl = "http://www.sanfo.com/shop/product.info.asp?command=findthumb&vid={0}"; var imgs = HtmlCls.GetHtmlByUrl(String.Format(imgUrl, sanId), Encoding.UTF8); desc = Regex.Replace(desc, "<dt class=\"detailImg\" id=\"item_product_images\"></dt>", "<dt class=\"detailImg\" id=\"item_product_images\">" + imgs + "</dt>"); //排除a标签 desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", "$1"); //排除script标签 desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", ""); //清除样式 desc = Regex.Replace(desc, "(\\s*class=\"[^\"]+\")|(\\s*style=\"[^\"]+\")", ""); //替换成绝对路径 desc = Regex.Replace(desc, "src=\"(/[^\"]+)\"", "src=\"" + SanfoUrl + "$1\""); //替换三夫 desc = Regex.Replace(desc, "(三夫(户外?)?)", "本商城"); } return(desc); }
/// <summary> /// 根据商品编号获取链接(合作方式变更) /// 不能提供分类编号,只能用强大的搜索功能了~ /// </summary> /// <param name="num">商品编号</param> /// <returns></returns> public static string GetProUrlFromNum(string num) { string searchUrl = RedBabyUrl + "/search?keyword=" + num; string html = HtmlCls.GetHtmlByUrl(searchUrl, Encoding.UTF8); const string proReg = "<div[^>]*class=\"globalProductName\"[^>]*>\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>"; return(Utils.GetAbsoluteUrl(RedBabyUrl, RegexHelper.Match(html, proReg))); }
private static IEnumerable <string> GetUrlsFromHtml(string url) { var urls = new List <string>(); string docHtml = HtmlCls.GetHtmlByUrl(url); if (!string.IsNullOrEmpty(docHtml)) { var listHtml = HtmlCls.GetHtmlById(docHtml, "Id_prodItemList"); const string regStr = "<div[^>]*class=['\"]proPic['\"][^>]*><a[^>]*href=['\"]([^'\"]+)['\"][^>]*>"; urls = RegexHelper.Matches(listHtml, regStr); urls = urls.Select(t => (t.StartsWith("/") ? SanfoUrl + t : t)).ToList(); } return(urls); }
/// <summary> /// 获取成都地区库存Code /// </summary> /// <param name="docHtml">页面html</param> /// <returns>-1,未知;0,缺货;1,有货</returns> public static int GetCdStockCode(string docHtml) { int code = -1; try { docHtml = RegexHelper.ClearBr(docHtml); string stockUrl = RegexHelper.Match(docHtml, "(http://price.360buy.com/ows/stock/pshow-[a-zA-Z0-9]*.html)"); if (string.IsNullOrEmpty(stockUrl)) { string skUid = RegexHelper.Match(docHtml, "wareinfo.*sid[^\"]*\"([0-9a-zA-Z]*)\""); //省级库存 string purl = "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=provincestock&skuid=" + skUid + "&provinceid=22"; //市级库存 //string url = // "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=pcstock&skuid=" + // skUid + "&provinceid=22&cityid=1930"; string stockHtml = HtmlCls.GetHtmlByUrl(purl, SiteEncoding); if (!string.IsNullOrEmpty(stockHtml)) { string stockCode = RegexHelper.Match(stockHtml, "\"StockState\":(\\w+),"); code = (stockCode == "33" ? 1 : 0); } } else { string stockHtml = HtmlCls.GetHtmlByUrl(stockUrl, SiteEncoding); //源代码中有库存连接<script type="text/javascript" src="http://price.360buy.com/ows/stock/pshow-0F5D9F92C79383CED35C7903D3927987.html"></script> //内容如下:var stockdata = [{"Wid":183192,"Rid":6,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":3,"Stock":33,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":10,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":4,"Stock":33,"Days":0,"Purchase":0,"IsPop":false}]; //js主要代码如下:var orgname = { 6: "北京仓", 3: "上海仓", 10: "广州仓", 4: "成都仓", 5: "武汉仓", 7: "南京仓", 8: "济南仓", 9: "沈阳仓" }; //var stockstatus = { 33: "现货", 34: "无货", 36: "预定", 39: "在途", 0: "统计中" }; if (!string.IsNullOrEmpty(stockHtml)) { string stockCode = RegexHelper.Match(stockHtml, "\"Rid\":4,\"Stock\":([^,]+),"); //仅仅对成都仓库 code = (stockCode == "33" ? 1 : 0); } } } catch (Exception ex) { FileHelper.WriteException(ex); } return(code); }
public static string GetProDesc(string link) { //id=J_DivItemDesc var desc = ""; var docHtml = HtmlCls.GetHtmlByUrl(link); var desurl = RegexHelper.Match(docHtml, "\"apiItemDesc\":\"([^\"]+?)\""); if (!string.IsNullOrEmpty(desurl)) { desurl = desurl.Replace("\\", ""); desc = HtmlCls.GetHtmlByUrl(desurl); desc = desc.Replace("var desc='", "").TrimEnd('\''); } return(desc); }
/// <summary> /// 根据列表页获取产品链接 /// </summary> /// <param name="listUrl">列表链接</param> /// <param name="deepth">扫描深度</param> /// <returns></returns> public static IEnumerable <string> GetUrlListFromList(string listUrl, int deepth) { var listArea = new List <string>(); try { listUrl = listUrl.TrimEnd('/'); var docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8); if (string.IsNullOrEmpty(listUrl)) { return(listArea); } docHtml = RegexHelper.ClearTrn(docHtml); const string prolink = "<div[^>]*class=\"globalProductName\">\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>"; var page = 1; listArea = RegexHelper.Matches(docHtml, prolink); //分页处理 int count = listArea.Count(); while (count < deepth) { var pstr = RegexHelper.Match(listUrl, @"p(\d+)?$"); if (!string.IsNullOrEmpty(pstr)) { page = Convert.ToInt32(pstr); } page++; listUrl = Regex.Replace(listUrl, @"p(\d+)$", "") + "p" + page; docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8); if (string.IsNullOrEmpty(docHtml)) { break; } listArea.AddRange(RegexHelper.Matches(docHtml, prolink)); count = listArea.Count(); } listArea = listArea.Take(deepth).ToList(); } catch (Exception ex) { FileHelper.WriteException(Err, ex); } return(listArea); }
/// <summary> /// 获取价格信息[market_price]、[price] /// </summary> /// <param name="rNum">002站点产品Id</param> /// <returns></returns> //public static JsonCls.JsonObject GetPriceInfo(string rNum) //{ // const string baseUrl = "http://www.redbaby.com.cn/catalog/category/getPriceInfo?ids={0}"; // string url = String.Format(baseUrl, rNum); // string info = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8); // if (!string.IsNullOrEmpty(info) && info != "[]") // { // JsonCls.JsonObject json = JsonCls.JsonConvert.DeserializeObject(info); // //初始化 // JsonCls.JsonConvert.SetJson(new JsonCls.JsonObject()); // return (JsonCls.JsonObject)json[rNum]; // } // return null; //} /// <summary> /// 判断四川是否有货 /// </summary> /// <param name="proId">002站点产品ID</param> /// <returns></returns> public static bool CheckScStock(string proId) { const string baseUrl = "http://www.redbaby.com.cn/catalog/product/getStockInfo?id={0}"; string url = String.Format(baseUrl, proId); string info = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8); info = Encoding.UTF8.GetString(Encoding.Default.GetBytes(info)); if (!string.IsNullOrEmpty(info)) { if (info.IndexOf("有货") > -1 && info.IndexOf("四川") > -1) { return(true); } return(false); } return(false); }
private static IEnumerable <string> GetUrlsFromHtml(string url, out string next) { next = ""; var urls = new List <string>(); //tmall根据cookie不一样,前端显示也不一样。。 const string cookie = "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=8eb29ff22cbe3bddcad34d264d01806f; passtime=1341280687588; isFirstOpen=false; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0"; string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie); if (!string.IsNullOrEmpty(docHtml)) { docHtml = RegexHelper.ClearBr(docHtml); next = Utils.UrlDecode(RegexHelper.Match(docHtml, "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>")); var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList"); const string regStr = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-Img['\"][^>]*>"; //"<a[^>]*class=['\"]product-title['\"][^>]*href=['\"]([^'\"]+)['\"][^>]*>"; urls = RegexHelper.Matches(listHtml, regStr); urls = urls.Select(t => (t.StartsWith("/") ? BaseUrl : "") + Utils.UrlDecode(t)).ToList(); } return(urls); }
public override int GetStockCode() { int code = -1; try { GetHtml(SiteEncoding); string stockUrl = RegexHelper.Match(DocHtml, "(http://price.360buy.com/ows/stock/pshow-[a-zA-Z0-9]*.html)"); if (string.IsNullOrEmpty(stockUrl)) { string skUid = RegexHelper.Match(DocHtml, "[\"']?((skuidkey)|(sid))[\"']?:\\s*[\"']([0-9a-zA-Z]+)[\"']", 4); var type = RegexHelper.Match(DocHtml, "type:\\s*(\\d+)"); var sUrl = ""; if (type == "1") { //市级库存 sUrl = "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=ststock&skuid=" + skUid + "&provinceid=22&cityid=1930&areaid=1945"; } else { sUrl = "http://st.3.cn/gsi.html?callback=gSC&type=provincestock&skuid=" + skUid + "&provinceid=22"; //省级库存 //sUrl = // "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=provincestock&skuid=" + // skUid + "&provinceid=22"; } string stockHtml = HtmlCls.GetHtmlByUrl(sUrl, SiteEncoding); if (!string.IsNullOrEmpty(stockHtml)) { var stockStr = RegexHelper.Match(stockHtml, "\"StockStateName\":\"([^\"]+)\"", 1); if (stockStr == "有货") { return(1); } var scode = (RegexHelper.Match(stockHtml, type == "1" ? "\"S\":\"1-(\\d+)-1-0-0\"" : "\"StockState\":(\\w+),")); code = (scode == "33" ? 1 : 0); } } else { string stockHtml = HtmlCls.GetHtmlByUrl(stockUrl, SiteEncoding); //源代码中有库存连接<script type="text/javascript" src="http://price.360buy.com/ows/stock/pshow-0F5D9F92C79383CED35C7903D3927987.html"></script> //内容如下:var stockdata = [{"Wid":183192,"Rid":6,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":3,"Stock":33,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":10,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":4,"Stock":33,"Days":0,"Purchase":0,"IsPop":false}]; //js主要代码如下:var orgname = { 6: "北京仓", 3: "上海仓", 10: "广州仓", 4: "成都仓", 5: "武汉仓", 7: "南京仓", 8: "济南仓", 9: "沈阳仓" }; //var stockstatus = { 33: "现货", 34: "无货", 36: "预定", 39: "在途", 0: "统计中" }; if (!string.IsNullOrEmpty(stockHtml)) { string stockCode = RegexHelper.Match(stockHtml, "\"Rid\":4,\"Stock\":([^,]+),"); //仅仅对成都仓库 code = (stockCode == "33" ? 1 : 0); } } } catch (Exception ex) { FileHelper.WriteException(ex); } return(code); }