internal void RecieveCrawlResult(CrawlResult pResult) { PlotWaterLine sPlotWaterLine; lock (mLocker) { if (mLineDic.TryGetValue(pResult.LineID, out sPlotWaterLine) == false) //丢弃任务 { return; } } sPlotWaterLine.RecieveCrawlResult(pResult); }
/// <summary> /// 线程池抓取任务 /// </summary> /// <param name="obj"></param> /// <returns></returns> public static void ExeTask(object obj) { CrawlTask sCrawlTask = obj as CrawlTask; CrawlResult sCrawlResult = new CrawlResult(sCrawlTask.ID, sCrawlTask.PlotKey, sCrawlTask.LineID); sCrawlTask.List.ForEach(t => { try { item.URL = t.Url; item.Method = "get"; result = httpHelper.GetHtml(item); sCrawlResult.List.Add(new CrawlResultDetail { Result = true, ID = t.ID, Ext = "html", Content = result.Html, Info = null }); if (DelayMin >= DelayMax) { DelayMax = DelayMin + 5000; } Thread.Sleep(new Random().Next(DelayMin, DelayMax)); } catch (Exception ee) { sCrawlResult.List.Add(new CrawlResultDetail { Result = false, ID = t.ID, Ext = "Error", Content = null, Info = ee.Message }); } }); lock (mLocker) { mTaskPool.Remove(sCrawlTask.ID); //界面设计 HostStatus sHostStatus; if (mHostDic.TryGetValue(sCrawlTask.Host, out sHostStatus)) { //sHostStatus.Total += sCrawlResult.List.Count; sHostStatus.TaskCount--; } } //发送任务回数据中心 WCFServer.SendingCrawlResult(sCrawlResult, sCrawlTask.Authority); }
internal static void ReceiveCrawlResult(CrawlResult pCrawlResult) { OpenPlot sOpenPlot; lock (mLocker) { if (mPlotPool.TryGetValue(pCrawlResult.PlotKey, out sOpenPlot) == false) { return; } } sOpenPlot.RecieveCrawlResult(pCrawlResult); }
internal void RecieveCrawlResult(CrawlResult pResult) { CrawlTask sCrawlTask; lock (mLocker) { if (mRunningTaskDic.TryGetValue(pResult.TaskID, out sCrawlTask) == false || sCrawlTask.List.Count != pResult.List.Count) //未注册的任务 { return; } else { //该任务算是完成了 mRunningTaskDic.Remove(pResult.TaskID); for (int i = 0; i < sCrawlTask.List.Count; i++) { mRunningTaskDetailDic.Remove(sCrawlTask.List[i].Key); } } } //最后时间 mLastProduceDt = DateTime.Now; //将任务和结果合并成一个对象 List<CrawlOriData> sCrawlOriDataList = new List<CrawlOriData>(); //任务包完成的细节任务集合ID List<int> mFinishedIdList = new List<int>(); for (int i = 0; i < sCrawlTask.List.Count; i++) { CrawlResultDetail sCrawlResultDetail = pResult.List[i]; CrawlTaskDetail sCrawlTaskDetail = sCrawlTask.List[i]; //将完成ID mFinishedIdList.Add(sCrawlTaskDetail.ID); CrawlOriData sCrawlOriData = new CrawlOriData(sCrawlTaskDetail, sCrawlResultDetail, (byte)("utf-8".Equals(Chaset, StringComparison.OrdinalIgnoreCase) ? 0 : 1)); //存储原始数据 crawlDbAdapter.InsertCrawlResult(sCrawlResultDetail); // sCrawlOriDataList.Add(sCrawlOriData); } //更新数据库标示,已经完成任务 mFinishedIdList.ForEach(t => { crawlDbAdapter.UpdateCTask(t); }); AddCrawlDatasWaitDrill(sCrawlOriDataList); }
/// <summary> /// 爬虫获取到html发送回服务中心 /// </summary> /// <param name="pCrawlResult"></param> public static void SendingCrawlResult(CrawlResult pCrawlResult, string pAuthority) { LwbProcess(pCrawlResult, (int)CrawlCmd.发送爬行任务, pAuthority); }