예제 #1
0
 /// <summary>
 /// 添加或者更新任务
 /// </summary>
 /// <param name="error"></param>
 public void SaveSpiderError(SpiderError error)
 {
     using (var db = _dbFactory.OpenDbConnection())
     {
         try
         {
             db.Save(error);
         }
         catch (Exception ex)
         {
             LogServer.WriteLog(ex, "DBError");
         }
     }
 }
예제 #2
0
        public bool HasProducts(string pageHtml)
        {
            if (pageHtml.Contains("id=\"plist\""))
                return true;

            ErrorInfo = new SpiderError();
            ErrorInfo.Lvevl = 5;
            ErrorInfo.ErrType = "列表也没有产品";
            ErrorInfo.PageHtml = pageHtml;
            //ErrorInfo.UrlPath = pageUrl;
            ErrorInfo.SiteId = Baseinfo.SiteId;
            ErrorInfo.SiteName = Baseinfo.SiteName;
            return false;
        }
예제 #3
0
 public int GetComments(string singleHtml)
 {
     if (!regIsMatch(singleHtml, Reginfo.CommentCountReg))
     {
         if (Baseinfo==null)
             Baseinfo = new SiteInfoDB().SiteById(1);
         ErrorInfo = new SpiderError
         {
             Lvevl = 4,
             UrlPath = "",
             SiteId = Baseinfo.SiteId,
             ErrType = "获取评论错误",
             SingleHtml = singleHtml
         };
         return 0;
     }
     var count = RegGroupsX<int>(singleHtml, Reginfo.CommentCountReg);
     return count;
 }
예제 #4
0
 public string GetSpName(string singleHtml)
 {
     var title = RegGroupsX<string>(singleHtml, Reginfo.TitleReg);
     if (!ValidItemName(title))
     {
         ErrorInfo = new SpiderError();
         ErrorInfo.Lvevl = 4;
         ErrorInfo.SingleHtml = singleHtml;
         ErrorInfo.SiteId = Baseinfo.SiteId;
         ErrorInfo.ErrType = "产品标题错误";
         return "";
     }
     return title;
 }
예제 #5
0
 public string GetSpUrl(string singleHtml)
 {
     var url = RegGroupsX<string>(singleHtml, Reginfo.UrlReg);
     if (!ValidItemurl(url))
     {
         ErrorInfo = new SpiderError();
         ErrorInfo.Lvevl = 4;
         ErrorInfo.SingleHtml = singleHtml;
         ErrorInfo.SiteId = Baseinfo.SiteId;
         ErrorInfo.ErrType = "产品url错误";
         return "";
     }
     return url;
 }
예제 #6
0
        public string GetSkuDes(string detial)
        {
            string skudetial = RegGroupsX<string>(detial,
               "<ul class=\"detail-list\">(?<x>.*?)</ul>");
            if (string.IsNullOrEmpty(skudetial))
                return "";
            var list = RegGroupCollection(skudetial, "<li( title=\".*?\"|)>(?<x>.*?)</li>");

            if (list == null || list.Count == 0)
            {
                ErrorInfo = new SpiderError
                {
                    Lvevl = 4,
                    UrlPath = "",
                    SiteId = Baseinfo.SiteId,
                    ErrType = "获取sku参数错误",
                    SingleHtml = detial
                };
                return "";
            }
            StringBuilder res = new StringBuilder();
            const string fomat = "\"{0}\":\"{1}\"";
            res.Append("[");
            foreach (Match li in list)
            {
                var txt =WordCenter.FilterHtml( li.Groups["x"].Value).Replace('"','\'');
                string[] templist = txt.Split(':');
                if (templist.Length == 2)
                {
                    res.Append("{");
                    res.AppendFormat(fomat, templist[0], templist[1]);
                    res.Append("},");
                }
            }
            res.Remove(res.Length-1,1);
            res.Append("]");

            return res.ToString();
        }
예제 #7
0
 public string GetSmallPic(string singleHtml)
 {
     if (!regIsMatch(singleHtml, Reginfo.PicReg))
     {
         ErrorInfo = new SpiderError
         {
             Lvevl = 4,
             UrlPath = "",
             SiteId = Baseinfo.SiteId,
             ErrType = "获取小图错误",
             SingleHtml = singleHtml
         };
         return "";
     }
     return RegGroupsX<string>(singleHtml, Reginfo.PicReg);
 }
예제 #8
0
 public string GetItemSku(string url)
 {
     var tempid = RegGroupsX<string>(url, Reginfo.SkuReg);
     if (!ValidItemurl(url))
     {
         ErrorInfo = new SpiderError();
         ErrorInfo.Lvevl = 4;
         ErrorInfo.UrlPath = url;
         ErrorInfo.SiteId = Baseinfo.SiteId;
         ErrorInfo.ErrType = "产品id错误";
         return "";
     }
     tempid = Baseinfo.SiteId + "|" + tempid;
     return tempid;
 }
예제 #9
0
 public decimal GetSpPrice(string singleHtml, string skuid)
 {
     var res = RegGroupsX<decimal>(singleHtml, Reginfo.PriceReg);
     if (res == 0)
     {
         ErrorInfo = new SpiderError();
         ErrorInfo.Lvevl = 4;
         ErrorInfo.SingleHtml = singleHtml;
         ErrorInfo.SiteId = Baseinfo.SiteId;
         ErrorInfo.ErrType = "产品价格错误";
         return 0;
     }
     return res;
 }
예제 #10
0
 public string GetSkuDes(string detial)
 {
     string skudetial = RegGroupsX<string>(detial, "<dt>规格参数\r\n<a id=\"medica_record\"(?<x>.*?)</dl>");
     if (skudetial == null)
     {
         ErrorInfo = new SpiderError
         {
             Lvevl = 4,
             UrlPath = "",
             SiteId = Baseinfo.SiteId,
             ErrType = "获取sku参数错误",
             SingleHtml = detial
         };
         return "";
     }
     var list = RegGroupCollection(skudetial, "<dd title=\"(?<x>.*?)\" >");
     StringBuilder res = new StringBuilder();
     const string fomat = "\"{0}\":\"{1}\"";
     res.Append("[");
     if (list.Count == 0)
     {
         ErrorInfo = new SpiderError
         {
             Lvevl = 4,
             UrlPath = "",
             SiteId = Baseinfo.SiteId,
             ErrType = "获取sku参数错误",
             SingleHtml = detial
         };
         return "";
     }
     foreach (Match item in list)
     {
         string title = item.Groups["x"].Value;
         string[] templist = title.Split(':');
         if (templist.Length == 2)
         {
             res.Append("{");
             res.AppendFormat(fomat, templist[0], templist[1]);
             res.Append("},");
         }
     }
     res.Remove(res.Length - 1, 1);
     res.Append("]");
     return res.ToString();
 }
예제 #11
0
 public string GetSmallPic(string singleHtml)
 {
     //var img =RegGroupsX<string>(singleHtml, Reginfo.PicReg);
     var img = RegGroupsX<string>(singleHtml, "<sImg>(?<x>.*?)</sImg>");
     if(string.IsNullOrEmpty(img))
     {
         img = RegGroupsX<string>(singleHtml.Replace("<sImg></sImg>", ""), "<sImg>(?<x>.*?)</sImg>");
         if(!string.IsNullOrEmpty(img))
             return img + "_360.jpg";
         ErrorInfo = new SpiderError
         {
             Lvevl = 4,
             UrlPath = "",
             SiteId = Baseinfo.SiteId,
             ErrType = "获取小图错误",
             SingleHtml = singleHtml
         };
         return "";
     }
     return img + "_360.jpg";
 }
예제 #12
0
        public string GetSkuDes(string detial)
        {
            string skudetial = RegGroupsX<string>(detial,
               "<ul class=\"specbox\">(?<x>.*?)</ul>");
              if (string.IsNullOrEmpty(skudetial))
              return "";
              var list = RegGroupCollection(skudetial, "<li>(?<x>.*?)</li>");
              if (list == null || list.Count == 0)
              {
              ErrorInfo = new SpiderError
              {
                  Lvevl = 4,
                  UrlPath = "",
                  SiteId = Baseinfo.SiteId,
                  ErrType = "获取sku参数错误",
                  SingleHtml = detial
              };
              return "";
              }
              StringBuilder res = new StringBuilder();
              const string fomat = "\"{0}\":\"{1}\"";
              res.Append("[");
              foreach (Match li in list)
              {
              var txt = li.Groups["x"].Value;
              string tempkey = RegGroupsX<string>(txt, "<span class=\"specinfo\">(?<x>.*?)</span>");
              string tempval = RegGroupsX<string>(txt, "<span>(?<x>.*?)</span>");
              if (!string.IsNullOrEmpty(tempkey) && !string.IsNullOrEmpty(tempval))
              {
                  res.Append("{");
                  res.AppendFormat(fomat, tempkey, tempval);
                  res.Append("},");
              }
              }
              res.Remove(res.Length - 1, 1);
              res.Append("]");

              return res.ToString();
        }