public void ExecPageDBData() { DbHelp.Utilities util = new DbHelp.Utilities(); Log.ClsLog clsLog = new Log.ClsLog(); Common.UrlContorl urlContorl = new Common.UrlContorl(); while (true) { Parallel.ForEach <DBEntity>(Program.dbList.GetConsumingEnumerable(), dbEntity => //foreach (DBEntity dbEntity in Program.dbList.GetConsumingEnumerable()) { try { //插入SQL util.ExecNonQuery(dbEntity.myCom, ""); } catch (Exception ex) { urlContorl.SaveUrl(dbEntity.pageContentEntity, ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), "执行数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), dbEntity.pageContentEntity.SType + ";" + dbEntity.pageContentEntity.PID + ";" + dbEntity.pageContentEntity.Url + ""); } Interlocked.Decrement(ref Program.clsDBSignal); }); } }
public void AnalyseData() { string SDate = Program.SpiderDate; Log.ClsLog clsLog = new Log.ClsLog(); Common.UrlContorl urlContorl = new Common.UrlContorl(); MainContorl mainContorl = new MainContorl(); Spider.ClsPageUrl clsPageUrl = new Spider.ClsPageUrl(); //new ParallelOptions() { MaxDegreeOfParallelism = 5 } 设置最大并行数量 //Parallel.ForEach<PageContentEntity>(Program.pageContentList.GetConsumingEnumerable(), pageContentEntity => //找出要抓取的Url foreach (PageContentEntity pageContentEntity in Program.pageContentList.GetConsumingEnumerable()) { //开始抓取数据 try { #region 分析数据 if (pageContentEntity.PContent.Trim() != "" || pageContentEntity.MContent != null) { //分析数据 switch (pageContentEntity.SType) { case "Portal": mainContorl.HousePortalAnalysis(pageContentEntity); break; default: break; } } else { //保存抓取失败数据 urlContorl.SaveUrl(pageContentEntity, "页面数据为空"); clsLog.AddLog(DateTime.Now.ToString(), "抓取数据失败"); clsLog.AddLog(DateTime.Now.ToString(), pageContentEntity.SType + ";" + pageContentEntity.Url); } #endregion } catch (Exception ex) { clsLog.AddLog(DateTime.Now.ToString(), "分析数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), pageContentEntity.SType + ";" + pageContentEntity.PID + ";" + pageContentEntity.Url); urlContorl.SaveUrl(pageContentEntity, ex.ToString()); } Interlocked.Decrement(ref Program.clsContentSignal); } }
public async void SpiderData() { Log.ClsLog clsLog = new Log.ClsLog(); Page.ClsPageContent clsPageContent = new Page.ClsPageContent(); SnatchAt sa = new SnatchAt(); string PContent = ""; Bitmap MContent = null; UrlContorl urlContorl = new UrlContorl(); CookieContainer cookies = new CookieContainer(); ProxyEntity proxy = new ProxyEntity(); Common.NetContorl netContorl = new Common.NetContorl(); CancellationTokenSource tokenSource = new CancellationTokenSource(); HttpClient _client = new HttpClient("", 0, false); //Links.ForEach(url =>//串行 //Parallel.ForEach<PageUrlEntity>(Program.pageUrlList.GetConsumingEnumerable(), new ParallelOptions() { MaxDegreeOfParallelism = 5 }, pageUrlEntity => //找出要抓取的Url foreach (PageUrlEntity pageUrlEntity in Program.pageUrlList.GetConsumingEnumerable()) { //判断分析队列中的页面数是否大于最大分析队列页面数,如果大于则休眠系统设置时间 if (Program.clsContentSignal >= Program.sysPara.MaxPage) { Thread.Sleep(Program.sysPara.SpiderSleepTime); } if (Program.sysPara.IsProxy == "true") { proxy = netContorl.GetProxyEntity_URL(); while (proxy == null) { Thread.Sleep(Program.sysPara.NetSleepTime); proxy = netContorl.GetProxyEntity_URL(); } _client = new HttpClient(proxy.proxyAddess.Split(':')[0].ToString(), Convert.ToInt32(proxy.proxyAddess.Split(':')[1].ToString()), false); } //开始抓取数据 try { //抓取数据 try { PContent = ""; MContent = null; if (pageUrlEntity.UrlType == "MGET") { PContent = _client.GetResponse(pageUrlEntity.SourUrl, pageUrlEntity.Url, pageUrlEntity.UrlType, pageUrlEntity.UrlPara, pageUrlEntity.EnCode); pageUrlEntity.CookieContent = cookies; pageUrlEntity.PContent = ""; if (MContent == null) { throw new Exception("空图片"); } clsPageContent.AddPageContent(pageUrlEntity, MContent); } else { PContent = _client.GetResponse(pageUrlEntity.SourUrl, pageUrlEntity.Url, pageUrlEntity.UrlType, pageUrlEntity.UrlPara, pageUrlEntity.EnCode); pageUrlEntity.CookieContent = cookies; pageUrlEntity.PContent = PContent; if (PContent == "") { throw new Exception("空页面"); } if (PContent.Contains("超时")) { throw new Exception("操作超时"); } clsPageContent.AddPageContent(pageUrlEntity, PContent); } } catch (Exception ex) { clsLog.AddLog(DateTime.Now.ToString(), "抓取数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), pageUrlEntity.SType + ";" + pageUrlEntity.Url + ";"); urlContorl.SaveUrl(pageUrlEntity, PContent, ex.ToString()); } } catch (Exception ex) { clsLog.AddLog(DateTime.Now.ToString(), "抓取失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), pageUrlEntity.SType + ";" + pageUrlEntity.Url + ";"); urlContorl.SaveUrl(pageUrlEntity, PContent, ex.ToString()); } Interlocked.Decrement(ref Program.clsUrlSignal); Thread.SpinWait(Program.sysPara.BegSpiderIntervalTime + Program.sysPara.IntervalSpiderIntervalTime * (Program.CurrSpiderTimes - 1)); } }
public void SpiderData() { Log.ClsLog clsLog = new Log.ClsLog(); Page.ClsPageContent clsPageContent = new Page.ClsPageContent(); SnatchAt sa = new SnatchAt(); string PContent = ""; Bitmap MContent = null; UrlContorl urlContorl = new UrlContorl(); CookieContainer cookies = new CookieContainer(); ProxyEntity proxy = new ProxyEntity(); Common.NetContorl netContorl = new Common.NetContorl(); CancellationTokenSource tokenSource = new CancellationTokenSource(); CookieContainer cookie = new CookieContainer(); HttpClient _client = new HttpClient("", 0, false, cookie); //Links.ForEach(url =>//串行 //Parallel.ForEach<PageUrlEntity>(Program.pageUrlList.GetConsumingEnumerable(), new ParallelOptions() { MaxDegreeOfParallelism = 5 }, pageUrlEntity => //找出要抓取的Url foreach (PageUrlEntity pageUrlEntity in Program.pageUrlList.GetConsumingEnumerable()) { if (pageUrlEntity.CookieContent != null) { _client = new HttpClient("", 0, false, pageUrlEntity.CookieContent); cookie = pageUrlEntity.CookieContent; } //判断分析队列中的页面数是否大于最大分析队列页面数,如果大于则休眠系统设置时间 if (Program.clsContentSignal >= Program.sysPara.MaxPage) { Thread.Sleep(Program.sysPara.SpiderSleepTime); } if (Program.sysPara.IsProxy == "true") { proxy = netContorl.GetProxyEntity_URL2(); while (proxy == null) { Thread.Sleep(Program.sysPara.NetSleepTime); proxy = netContorl.GetProxyEntity_URL2(); } proxy.proxyAddess = proxy.proxyAddess.Replace("http://", ""); if (pageUrlEntity.CookieContent.Count > 0) { _client = new HttpClient("", 0, false, pageUrlEntity.CookieContent); cookie = pageUrlEntity.CookieContent; } else { _client = new HttpClient(proxy.proxyAddess.Split(':')[0].ToString(), Convert.ToInt32(proxy.proxyAddess.Split(':')[1].ToString()), true, null); } } int _spidertime = 1; //开始抓取数据 try { //抓取数据 try { PContent = ""; MContent = null; if (pageUrlEntity.UrlType == "MGET") { PContent = _client.GetResponse(pageUrlEntity.SourUrl, pageUrlEntity.Url, pageUrlEntity.UrlType, pageUrlEntity.UrlPara, pageUrlEntity.EnCode); pageUrlEntity.CookieContent = cookies; pageUrlEntity.PContent = ""; if (MContent == null) { throw new Exception("空图片"); } clsPageContent.AddPageContent(pageUrlEntity, MContent); } else { PContent = _client.GetResponse(pageUrlEntity.SourUrl, pageUrlEntity.Url, pageUrlEntity.UrlType, pageUrlEntity.UrlPara, pageUrlEntity.EnCode); pageUrlEntity.CookieContent = cookies; pageUrlEntity.PContent = PContent; if (string.IsNullOrEmpty(PContent)) { throw new Exception("空页面"); } if (PContent.Contains("超时")) { throw new Exception("操作超时"); } clsPageContent.AddPageContent(pageUrlEntity, PContent); } } catch (Exception ex) { clsLog.AddLog(DateTime.Now.ToString(), "抓取数据失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), pageUrlEntity.SType + ";" + pageUrlEntity.Url + ";"); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "抓取数据失败" + ex.ToString() }); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = pageUrlEntity.SType + ";" + pageUrlEntity.Url + ";" }); //urlContorl.SaveUrl(pageUrlEntity, PContent, ex.ToString()); //错误页面重抓 if (_spidertime < 4) { Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "错误页面重抓" }); if (Program.sysPara.IsProxy == "true") { proxy = netContorl.GetProxyEntity_URL2(); while (proxy == null) { Thread.Sleep(Program.sysPara.NetSleepTime); proxy = netContorl.GetProxyEntity_URL2(); } proxy.proxyAddess = proxy.proxyAddess.Replace("http://", ""); _client = new HttpClient(proxy.proxyAddess.Split(':')[0].ToString(), Convert.ToInt32(proxy.proxyAddess.Split(':')[1].ToString()), true, pageUrlEntity.CookieContent); } PContent = ""; MContent = null; Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "第" + _spidertime + "次重抓" }); if (pageUrlEntity.UrlType == "MGET") { PContent = _client.GetResponse(pageUrlEntity.SourUrl, pageUrlEntity.Url, pageUrlEntity.UrlType, pageUrlEntity.UrlPara, pageUrlEntity.EnCode); pageUrlEntity.CookieContent = cookies; pageUrlEntity.PContent = ""; if (MContent == null) { throw new Exception("空图片"); } clsPageContent.AddPageContent(pageUrlEntity, MContent); } else { PContent = _client.GetResponse(pageUrlEntity.SourUrl, pageUrlEntity.Url, pageUrlEntity.UrlType, pageUrlEntity.UrlPara, pageUrlEntity.EnCode); pageUrlEntity.CookieContent = cookies; pageUrlEntity.PContent = PContent; if (string.IsNullOrEmpty(PContent)) { throw new Exception("空页面"); } if (PContent.Contains("超时")) { throw new Exception("操作超时"); } clsPageContent.AddPageContent(pageUrlEntity, PContent); } Thread.Sleep(Program.sysPara.BegSpiderIntervalTime + Program.sysPara.IntervalSpiderIntervalTime * (Program.CurrSpiderTimes - 1)); _spidertime++; } } } catch (Exception ex) { clsLog.AddLog(DateTime.Now.ToString(), "抓取失败" + ex.ToString()); clsLog.AddLog(DateTime.Now.ToString(), pageUrlEntity.SType + ";" + pageUrlEntity.Url + ";"); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = "抓取数据失败" + ex.ToString() }); Program.helper.OntxtviewCompleted(this, new EventControllerArgs() { IsSuccess = true, Msg = pageUrlEntity.SType + ";" + pageUrlEntity.Url + ";" }); //urlContorl.SaveUrl(pageUrlEntity, PContent, ex.ToString()); } Interlocked.Decrement(ref Program.clsUrlSignal); Program.helper.OnAllItemAnalyzeCompleted(this, new EventControllerArgs() { IsSuccess = true }); Thread.Sleep(Program.sysPara.BegSpiderIntervalTime + Program.sysPara.IntervalSpiderIntervalTime * (Program.CurrSpiderTimes - 1)); } }