示例#1
0
 internal void RecieveCrawlResult(CrawlResult pResult)
 {
     PlotWaterLine sPlotWaterLine;
     lock (mLocker)
     {
         if (mLineDic.TryGetValue(pResult.LineID, out sPlotWaterLine) == false)  //丢弃任务
         {
             return;
         }
     }
     sPlotWaterLine.RecieveCrawlResult(pResult);
 }
示例#2
0
        /// <summary>
        /// 线程池抓取任务
        /// </summary>
        /// <param name="obj"></param>
        /// <returns></returns>
        public static void ExeTask(object obj)
        {
            CrawlTask sCrawlTask = obj as CrawlTask;

            CrawlResult sCrawlResult = new CrawlResult(sCrawlTask.ID, sCrawlTask.PlotKey, sCrawlTask.LineID);

            sCrawlTask.List.ForEach(t =>
            {
                try
                {
                    item.URL = t.Url;
                    item.Method = "get";

                    result = httpHelper.GetHtml(item);

                    sCrawlResult.List.Add(new CrawlResultDetail
                    {
                        Result = true,
                        ID = t.ID,
                        Ext = "html",
                        Content = result.Html,
                        Info = null
                    });

                    if (DelayMin >= DelayMax)
                    {
                        DelayMax = DelayMin + 5000;
                    }
                    Thread.Sleep(new Random().Next(DelayMin, DelayMax));
                }
                catch (Exception ee)
                {
                    sCrawlResult.List.Add(new CrawlResultDetail
                    {
                        Result = false,
                        ID = t.ID,
                        Ext = "Error",
                        Content = null,
                        Info = ee.Message
                    });
                }
            });

            lock (mLocker)
            {
                mTaskPool.Remove(sCrawlTask.ID);
                //界面设计
                HostStatus sHostStatus;
                if (mHostDic.TryGetValue(sCrawlTask.Host, out sHostStatus))
                {
                    //sHostStatus.Total += sCrawlResult.List.Count;
                    sHostStatus.TaskCount--;
                }
            }
            //发送任务回数据中心
            WCFServer.SendingCrawlResult(sCrawlResult, sCrawlTask.Authority);
        }
示例#3
0
 internal static void ReceiveCrawlResult(CrawlResult pCrawlResult)
 {
     OpenPlot sOpenPlot;
     lock (mLocker)
     {
         if (mPlotPool.TryGetValue(pCrawlResult.PlotKey, out sOpenPlot) == false)
         {
             return;
         }
     }
     sOpenPlot.RecieveCrawlResult(pCrawlResult);
 }
示例#4
0
        internal void RecieveCrawlResult(CrawlResult pResult)
        {
            CrawlTask sCrawlTask;
            lock (mLocker)
            {
                if (mRunningTaskDic.TryGetValue(pResult.TaskID, out sCrawlTask) == false || sCrawlTask.List.Count != pResult.List.Count)  //未注册的任务
                {
                    return;
                }
                else
                {
                    //该任务算是完成了
                    mRunningTaskDic.Remove(pResult.TaskID);
                    for (int i = 0; i < sCrawlTask.List.Count; i++)
                    {
                        mRunningTaskDetailDic.Remove(sCrawlTask.List[i].Key);
                    }
                }
            }
            //最后时间
            mLastProduceDt = DateTime.Now;
            //将任务和结果合并成一个对象
            List<CrawlOriData> sCrawlOriDataList = new List<CrawlOriData>();
            //任务包完成的细节任务集合ID
            List<int> mFinishedIdList = new List<int>();
            for (int i = 0; i < sCrawlTask.List.Count; i++)
            {
                CrawlResultDetail sCrawlResultDetail = pResult.List[i];
                CrawlTaskDetail sCrawlTaskDetail = sCrawlTask.List[i];
                //将完成ID
                mFinishedIdList.Add(sCrawlTaskDetail.ID);
                CrawlOriData sCrawlOriData = new CrawlOriData(sCrawlTaskDetail, sCrawlResultDetail, (byte)("utf-8".Equals(Chaset, StringComparison.OrdinalIgnoreCase) ? 0 : 1));

                //存储原始数据
                crawlDbAdapter.InsertCrawlResult(sCrawlResultDetail);
                //
                sCrawlOriDataList.Add(sCrawlOriData);

            }
            //更新数据库标示,已经完成任务
            mFinishedIdList.ForEach(t => {
                crawlDbAdapter.UpdateCTask(t);
            });

            AddCrawlDatasWaitDrill(sCrawlOriDataList);
        }
示例#5
0
 /// <summary>
 /// 爬虫获取到html发送回服务中心
 /// </summary>
 /// <param name="pCrawlResult"></param>
 public static void SendingCrawlResult(CrawlResult pCrawlResult, string pAuthority)
 {
     LwbProcess(pCrawlResult, (int)CrawlCmd.发送爬行任务, pAuthority);
 }