コード例 #1
0
ファイル: CrawlerManager.cs プロジェクト: daywrite/Crawler
        /// <summary>
        /// 线程池抓取任务
        /// </summary>
        /// <param name="obj"></param>
        /// <returns></returns>
        public static void ExeTask(object obj)
        {
            CrawlTask sCrawlTask = obj as CrawlTask;

            CrawlResult sCrawlResult = new CrawlResult(sCrawlTask.ID, sCrawlTask.PlotKey, sCrawlTask.LineID);

            sCrawlTask.List.ForEach(t =>
            {
                try
                {
                    item.URL    = t.Url;
                    item.Method = "get";

                    result = httpHelper.GetHtml(item);


                    sCrawlResult.List.Add(new CrawlResultDetail
                    {
                        Result  = true,
                        ID      = t.ID,
                        Ext     = "html",
                        Content = result.Html,
                        Info    = null
                    });

                    if (DelayMin >= DelayMax)
                    {
                        DelayMax = DelayMin + 5000;
                    }
                    Thread.Sleep(new Random().Next(DelayMin, DelayMax));
                }
                catch (Exception ee)
                {
                    sCrawlResult.List.Add(new CrawlResultDetail
                    {
                        Result  = false,
                        ID      = t.ID,
                        Ext     = "Error",
                        Content = null,
                        Info    = ee.Message
                    });
                }
            });

            lock (mLocker)
            {
                mTaskPool.Remove(sCrawlTask.ID);
                //界面设计
                HostStatus sHostStatus;
                if (mHostDic.TryGetValue(sCrawlTask.Host, out sHostStatus))
                {
                    //sHostStatus.Total += sCrawlResult.List.Count;
                    sHostStatus.TaskCount--;
                }
            }
            //发送任务回数据中心
            WCFServer.SendingCrawlResult(sCrawlResult, sCrawlTask.Authority);
        }
コード例 #2
0
ファイル: CrawlerManager.cs プロジェクト: daywrite/Crawler
        /// <summary>
        /// 爬虫去干活去喽
        /// </summary>
        /// <returns></returns>
        public static LwbResult DbAdapter()
        {
            if (!mCanAttemper)
            {
                return(new LwbResult(LwbResultType.Success, "爬虫正在获取任务中,请勿累死爬虫"));
            }

            try
            {
                //锁定爬虫,让其暂时不接受任务
                mCanAttemper = false;
                int sMax;
                lock (mLocker)
                {
                    sMax = (MaxThreads - mTaskPool.Count) > 5 ? 5 : (MaxThreads - mTaskPool.Count);
                }
                //爬虫已经在干任务达到30个
                if (sMax == 0)
                {
                    return(new LwbResult(LwbResultType.Success, "爬虫已经正在干将近" + MaxThreads + "个任务,让他歇会吧"));
                }

                List <string> sList = new List <string>();
                lock (mLocker)
                {
                    foreach (KeyValuePair <string, HostStatus> sKp in mHostDic)
                    {
                        if (sKp.Value.Busy)
                        {
                            sList.Add(sKp.Key);
                        }
                    }
                }
                //去远程服务器取任务
                LwbResult sLwbResult = WCFServer.GetCrawlTask(new Input获取生产线任务列表 {
                    RuningTaskHost = sList, TaskMax = sMax
                });

                List <CrawlTask> sCrawlTaskList = sLwbResult.Data as List <CrawlTask>;

                if (sCrawlTaskList == null)
                {
                    return(sLwbResult);
                }

                sCrawlTaskList.ForEach(t =>
                {
                    lock (mLocker)
                    {
                        //任务包缓冲池
                        mTaskPool[t.ID] = t;

                        HostStatus sHostStatus;
                        if (mHostDic.TryGetValue(t.Host, out sHostStatus) == false)
                        {
                            sHostStatus      = new HostStatus(t.Host);
                            mHostDic[t.Host] = sHostStatus;
                        }
                        sHostStatus.TaskCount++;
                    }
                    ThreadPool.QueueUserWorkItem(new WaitCallback(ExeTask), t);
                });
                mLastAddTaskDt = DateTime.Now;

                return(new LwbResult(LwbResultType.Success, "爬虫获取任务完毕,很开心"));
            }
            catch (Exception ee)
            {
                return(new LwbResult(LwbResultType.Error, "爬虫在获取任务中生病了" + ee.Message));
            }
            finally
            {
                mCanAttemper = true;
            }
        }