public virtual void Execute(IJobExecutionContext context) { var dateTimeMin = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd"));//当天零点 var list = _ppismItemRepository.GetAll(x => (x.ItemSource == PPism.Model.Enum.DictPPItemSource.天猫 || x.ItemSource == PPism.Model.Enum.DictPPItemSource.淘宝) && (!x.LastListenTime.HasValue || x.LastListenTime.Value < dateTimeMin)).ToList(); for (int i = 0, length = list.Count; i < length; i++) { var item = list[i]; Process p = new Process(); try { var environment = Environment.CurrentDirectory; p.StartInfo.FileName = environment + "\\phantomjs\\bin\\phantomjs.exe"; p.StartInfo.WorkingDirectory = environment + "\\phantomjs\\bin\\"; string strArg = @"{0}\phantomjs\bin\tmallsavehtml.js ""{1}"" ""{0}\{2}"""; p.StartInfo.Arguments = string.Format(strArg, environment, item.ListenUrl, "\\phantomjs\\htmltmall\\" + item.Id.ToString()); p.StartInfo.CreateNoWindow = true; p.StartInfo.WindowStyle = ProcessWindowStyle.Hidden; if (!p.Start()) throw new Exception("无法Headless浏览器."); Thread.Sleep(2 * 1000); string fileUrl = string.Format(@"{0}\phantomjs\htmltmall\{1}.html", environment, item.Id.ToString()); var fileInfo = new FileInfo(fileUrl); if (!fileInfo.Exists || fileInfo.LastWriteTime < DateTime.Now.AddHours(-1)) Thread.Sleep(5 * 1000);//如果还没有返回就再等5秒 if (File.Exists(fileUrl)) { var htmldocument = new HtmlAgilityPack.HtmlDocument(); htmldocument.Load(fileUrl); var strHtml = htmldocument.DocumentNode.InnerHtml.ToString(); string reg = @"id=""J_PromoPrice""[\s\S]+?class=""tm-price"">(?<price>[^<]+)[\s\S]+?<img\s+id=""J_ImgBooth""[\s\S]+?src=""(?<imgUrl>[^""]+)"; if (item.ItemSource == PPism.Model.Enum.DictPPItemSource.淘宝)//淘宝和天猫正则不一样 reg = @"id=""J_PromoPriceNum""[\s\S]+?class=""tb-rmb-num"">(?<price>[^<]+)[\s\S]+?<img\s+id=""J_ImgBooth""[\s\S]+?src=""(?<imgUrl>[^""]+)"; var groups = Regex.Match(strHtml, reg).Groups; var price = groups["price"].Value.Trim().ToDecimal(0); var imgUrl = groups["imgUrl"].Value.Trim(); if (price > 0 && !string.IsNullOrEmpty(imgUrl)) { var ppismItemBll = new PPismJob.Common.PPismItemBll(); var priceItem = ppismItemBll.GetPriceItem(item, price, imgUrl); _priceItemRepository.Add(priceItem); _ppismItemRepository.Update(item); } } } catch (Exception ex) { throw ex; } finally { p.Dispose(); } } _repositoryContext.Commit(); }
public virtual void Execute(IJobExecutionContext context) { var dateTimeMin = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd"));//当天零点 var list = _ppismItemRepository.GetAll(x => (x.ItemSource == PPism.Model.Enum.DictPPItemSource.一号店) && (!x.LastListenTime.HasValue || x.LastListenTime.Value < dateTimeMin)).ToList(); var ppismItemBll = new PPismJob.Common.PPismItemBll(); for (int i = 0, length = list.Count; i < length; i++) { var item = list[i]; string mUrl = item.ListenUrl; if (!ppismItemBll.CheckIsMUrl(item.ListenUrl)) { var htmlWeb = new HtmlAgilityPack.HtmlWeb(); var strHtml = htmlWeb.Load(item.ListenUrl).DocumentNode.InnerHtml.ToString(); mUrl = Regex.Match(strHtml, @"name=""h5""\scontent='(?<mUrl>[^']+)'").Groups["mUrl"].Value.Trim(); } if (!string.IsNullOrEmpty(mUrl)) { using (var req = new xNet.Net.HttpRequest()) { req.UserAgent = xNet.Net.HttpHelper.FirefoxUserAgent(); //http://p.3.cn/prices/get?callback=cnp&type=1&area=1_72_2799&pdtk=&pduid=2002986638&pdpin=&pdbp=0&skuid=J_540462 req.CharacterSet = System.Text.Encoding.GetEncoding("utf-8"); var strMHtml = req.Get(mUrl).ToString(); string reg = @"class=""swipeSlide_detail"">[\s\S]+?<img[\s]src=""(?<imgUrl>[^""]+)[\s\S]+?id=""current_price""[\s\S]+?class=""pd_product-price-num"">(?<price>[^<]+)"; var groups = Regex.Match(strMHtml, reg).Groups; var price = groups["price"].Value.Trim().ToDecimal(0); var imgUrl = groups["imgUrl"].Value.Trim(); if (price > 0 && !string.IsNullOrEmpty(imgUrl)) { var priceItem = ppismItemBll.GetPriceItem(item, price, imgUrl); _priceItemRepository.Add(priceItem); _ppismItemRepository.Update(item); } } } } _repositoryContext.Commit(); }
public virtual void Execute(IJobExecutionContext context) { var dateTimeMin = DateTime.Parse(DateTime.Now.ToString("yyyy-MM-dd"));//当天零点 var list = _ppismItemRepository.GetAll(x => x.ItemSource == PPism.Model.Enum.DictPPItemSource.京东 && (!x.LastListenTime.HasValue || x.LastListenTime.Value < dateTimeMin)).ToList(); // for (int i = 0, length = list.Count; i < length; i++) { var item = list[i]; using (var req = new xNet.Net.HttpRequest()) { req.UserAgent = xNet.Net.HttpHelper.FirefoxUserAgent(); //http://p.3.cn/prices/get?callback=cnp&type=1&area=1_72_2799&pdtk=&pduid=2002986638&pdpin=&pdbp=0&skuid=J_540462 var strHtml = req.Get(item.ListenUrl).ToString(); var mUrl = Regex.Match(strHtml, @"content=""format\s*=\s*html5;\s*url\s*=(?<mUrl>[^""]+)").Groups["mUrl"].Value.Trim(); if (!string.IsNullOrEmpty(mUrl)) { req.CharacterSet = System.Text.Encoding.GetEncoding("utf-8"); var strMHtml = req.Get("http:" + mUrl).ToString(); string reg = @"id=""goods-img-box""[\s\S]+?<img[\s\S]+?src=""(?<imgUrl>[^""]+)[\s\S]+?prod-price"">\s*?<span>[\s\S]*?</span>(?<price>[^<]+)"; var groups = Regex.Match(strMHtml, reg).Groups; var price = groups["price"].Value.Trim().ToDecimal(0); var imgUrl = groups["imgUrl"].Value.Trim(); if (price > 0 && !string.IsNullOrEmpty(imgUrl)) { var ppismItemBll = new PPismJob.Common.PPismItemBll(); var priceItem = ppismItemBll.GetPriceItem(item, price, imgUrl); _priceItemRepository.Add(priceItem); _ppismItemRepository.Update(item); } } } } _repositoryContext.Commit(); }