protected override void Run() { BidWebsiteSpider sp = new BidWebsiteSpider(); var config = new BidSourceConfig { ListUrl = "http://www.chinabidding.com/zbzx.jhtml?method=outlineOne&type=biddingProjectGG&channelId=205" , DetailUrlPattern = @"http://www.chinabidding.com/zbzx-detail-\d+.html" , TitleXpath = "/html/body/div/div[2]/div[2]/div[1]/div/h2" , ContentXpath = "/html/body/div/div[2]/div[2]/div[1]/div/div[2]" , PubishDateXpath = "/html/body/div/div[2]/div[2]/div[1]/div/div[1]" , PubishDatePattern = @"(\d{4}\.\d{2}\.\d{2})" }; //BidSourceConfigBLL configService = new BidSourceConfigBLL(); //configService.Insert(config); var bidList = sp.DownLoadBids(config); BidBLL bidService = new BidBLL(); foreach (var entity in bidList) { bidService.Insert(entity); } }
public ActionResult Create(BidSourceConfig entity) { if (!string.IsNullOrWhiteSpace(entity.ListUrl)) { bidSourceConfigBLL.Insert(entity, this); } return(View(entity)); }
public List <Bid> DownLoadBids(BidSourceConfig config) { string listUrl = config.ListUrl; List <Bid> bidList = new List <Bid>(); string domain = new Url(listUrl).DomainUrl; var list = GetDetailUrlListByUrl(listUrl, config.DetailUrlPattern.Split(new string[] { "-/-" }, StringSplitOptions.RemoveEmptyEntries)); ExceptionBidSourceConfigBLL ebsc = new ExceptionBidSourceConfigBLL(); bool isRecordException = false; if (list != null && list.Count > 0) { foreach (var item in list) { try { Console.WriteLine(item); HtmlDocument doc = new WebPageLoader().GetPage(item); var text = doc.DocumentNode.InnerText; if (!string.IsNullOrWhiteSpace(text)) { var bid = Bid.GetDefaultEntity(); bid.BidSourceUrl = item; try { bid.BidTitle = doc.DocumentNode.SelectSingleNode(config.TitleXpath).InnerText; } catch (Exception e) { if (!isRecordException) { ebsc.Insert(new ExceptionBidSourceConfig { Config_BscID = config.BscID, Msg = "BidTitle,根据xPath获取时失败!", LogDate = DateTime.Now }); isRecordException = true; } throw e; } string strPubTime = ""; try { strPubTime = doc.DocumentNode.SelectSingleNode(config.PubishDateXpath).InnerText; } catch (Exception e) { if (!isRecordException) { //todo:记录获取失败 ebsc.Insert(new ExceptionBidSourceConfig { Config_BscID = config.BscID, Msg = "BidPublishDate,根据xPath获取时失败!", LogDate = DateTime.Now }); isRecordException = true; } throw e; } var m = Regex.Match(strPubTime, config.PubishDatePattern); if (m.Success) { bid.BidPublishDate = GetDateTime(m.Groups[1].Value); } else { if (!isRecordException) { ebsc.Insert(new ExceptionBidSourceConfig { Config_BscID = config.BscID, Msg = "BidPublishDate,转换失败!", LogDate = DateTime.Now }); isRecordException = true; } } try { bid.BidContent = doc.DocumentNode.SelectSingleNode(config.ContentXpath).InnerHtml; } catch (Exception e) { if (!isRecordException) { //todo:记录获取失败 ebsc.Insert(new ExceptionBidSourceConfig { Config_BscID = config.BscID, Msg = "BidContent,根据xPath获取时失败!", LogDate = DateTime.Now }); isRecordException = true; } throw e; } bidList.Add(bid); } } catch { } } } else { if (!isRecordException) { ebsc.Insert(new ExceptionBidSourceConfig { Config_BscID = config.BscID, Msg = "无法获取列表", LogDate = DateTime.Now }); isRecordException = true; } } return(bidList); }