/// <summary> /// 添加或者更新任务 /// </summary> /// <param name="error"></param> public void SaveSpiderError(SpiderError error) { using (var db = _dbFactory.OpenDbConnection()) { try { db.Save(error); } catch (Exception ex) { LogServer.WriteLog(ex, "DBError"); } } }
public bool HasProducts(string pageHtml) { if (pageHtml.Contains("id=\"plist\"")) return true; ErrorInfo = new SpiderError(); ErrorInfo.Lvevl = 5; ErrorInfo.ErrType = "列表也没有产品"; ErrorInfo.PageHtml = pageHtml; //ErrorInfo.UrlPath = pageUrl; ErrorInfo.SiteId = Baseinfo.SiteId; ErrorInfo.SiteName = Baseinfo.SiteName; return false; }
public int GetComments(string singleHtml) { if (!regIsMatch(singleHtml, Reginfo.CommentCountReg)) { if (Baseinfo==null) Baseinfo = new SiteInfoDB().SiteById(1); ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取评论错误", SingleHtml = singleHtml }; return 0; } var count = RegGroupsX<int>(singleHtml, Reginfo.CommentCountReg); return count; }
public string GetSpName(string singleHtml) { var title = RegGroupsX<string>(singleHtml, Reginfo.TitleReg); if (!ValidItemName(title)) { ErrorInfo = new SpiderError(); ErrorInfo.Lvevl = 4; ErrorInfo.SingleHtml = singleHtml; ErrorInfo.SiteId = Baseinfo.SiteId; ErrorInfo.ErrType = "产品标题错误"; return ""; } return title; }
public string GetSpUrl(string singleHtml) { var url = RegGroupsX<string>(singleHtml, Reginfo.UrlReg); if (!ValidItemurl(url)) { ErrorInfo = new SpiderError(); ErrorInfo.Lvevl = 4; ErrorInfo.SingleHtml = singleHtml; ErrorInfo.SiteId = Baseinfo.SiteId; ErrorInfo.ErrType = "产品url错误"; return ""; } return url; }
public string GetSkuDes(string detial) { string skudetial = RegGroupsX<string>(detial, "<ul class=\"detail-list\">(?<x>.*?)</ul>"); if (string.IsNullOrEmpty(skudetial)) return ""; var list = RegGroupCollection(skudetial, "<li( title=\".*?\"|)>(?<x>.*?)</li>"); if (list == null || list.Count == 0) { ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取sku参数错误", SingleHtml = detial }; return ""; } StringBuilder res = new StringBuilder(); const string fomat = "\"{0}\":\"{1}\""; res.Append("["); foreach (Match li in list) { var txt =WordCenter.FilterHtml( li.Groups["x"].Value).Replace('"','\''); string[] templist = txt.Split(':'); if (templist.Length == 2) { res.Append("{"); res.AppendFormat(fomat, templist[0], templist[1]); res.Append("},"); } } res.Remove(res.Length-1,1); res.Append("]"); return res.ToString(); }
public string GetSmallPic(string singleHtml) { if (!regIsMatch(singleHtml, Reginfo.PicReg)) { ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取小图错误", SingleHtml = singleHtml }; return ""; } return RegGroupsX<string>(singleHtml, Reginfo.PicReg); }
public string GetItemSku(string url) { var tempid = RegGroupsX<string>(url, Reginfo.SkuReg); if (!ValidItemurl(url)) { ErrorInfo = new SpiderError(); ErrorInfo.Lvevl = 4; ErrorInfo.UrlPath = url; ErrorInfo.SiteId = Baseinfo.SiteId; ErrorInfo.ErrType = "产品id错误"; return ""; } tempid = Baseinfo.SiteId + "|" + tempid; return tempid; }
public decimal GetSpPrice(string singleHtml, string skuid) { var res = RegGroupsX<decimal>(singleHtml, Reginfo.PriceReg); if (res == 0) { ErrorInfo = new SpiderError(); ErrorInfo.Lvevl = 4; ErrorInfo.SingleHtml = singleHtml; ErrorInfo.SiteId = Baseinfo.SiteId; ErrorInfo.ErrType = "产品价格错误"; return 0; } return res; }
public string GetSkuDes(string detial) { string skudetial = RegGroupsX<string>(detial, "<dt>规格参数\r\n<a id=\"medica_record\"(?<x>.*?)</dl>"); if (skudetial == null) { ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取sku参数错误", SingleHtml = detial }; return ""; } var list = RegGroupCollection(skudetial, "<dd title=\"(?<x>.*?)\" >"); StringBuilder res = new StringBuilder(); const string fomat = "\"{0}\":\"{1}\""; res.Append("["); if (list.Count == 0) { ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取sku参数错误", SingleHtml = detial }; return ""; } foreach (Match item in list) { string title = item.Groups["x"].Value; string[] templist = title.Split(':'); if (templist.Length == 2) { res.Append("{"); res.AppendFormat(fomat, templist[0], templist[1]); res.Append("},"); } } res.Remove(res.Length - 1, 1); res.Append("]"); return res.ToString(); }
public string GetSmallPic(string singleHtml) { //var img =RegGroupsX<string>(singleHtml, Reginfo.PicReg); var img = RegGroupsX<string>(singleHtml, "<sImg>(?<x>.*?)</sImg>"); if(string.IsNullOrEmpty(img)) { img = RegGroupsX<string>(singleHtml.Replace("<sImg></sImg>", ""), "<sImg>(?<x>.*?)</sImg>"); if(!string.IsNullOrEmpty(img)) return img + "_360.jpg"; ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取小图错误", SingleHtml = singleHtml }; return ""; } return img + "_360.jpg"; }
public string GetSkuDes(string detial) { string skudetial = RegGroupsX<string>(detial, "<ul class=\"specbox\">(?<x>.*?)</ul>"); if (string.IsNullOrEmpty(skudetial)) return ""; var list = RegGroupCollection(skudetial, "<li>(?<x>.*?)</li>"); if (list == null || list.Count == 0) { ErrorInfo = new SpiderError { Lvevl = 4, UrlPath = "", SiteId = Baseinfo.SiteId, ErrType = "获取sku参数错误", SingleHtml = detial }; return ""; } StringBuilder res = new StringBuilder(); const string fomat = "\"{0}\":\"{1}\""; res.Append("["); foreach (Match li in list) { var txt = li.Groups["x"].Value; string tempkey = RegGroupsX<string>(txt, "<span class=\"specinfo\">(?<x>.*?)</span>"); string tempval = RegGroupsX<string>(txt, "<span>(?<x>.*?)</span>"); if (!string.IsNullOrEmpty(tempkey) && !string.IsNullOrEmpty(tempval)) { res.Append("{"); res.AppendFormat(fomat, tempkey, tempval); res.Append("},"); } } res.Remove(res.Length - 1, 1); res.Append("]"); return res.ToString(); }