Ejemplo n.º 1
0
 public SchedulerContext(IMonitorableScheduler scheduler,
                         IDownloaderFactory downloaderFactory,
                         IResultItemPipeline resultItemPipeline,
                         IPageAnalyzerFactory pageAnalyzerFactory)
 {
     Scheduler           = scheduler;
     DownloaderFactory   = downloaderFactory;
     ResultPipeline      = resultItemPipeline;
     PageAnalyzerFactory = pageAnalyzerFactory;
 }
Ejemplo n.º 2
0
        public ConsumerBroker(int maxDownloadThreadNumber, IMonitorableScheduler scheduler, IEnumerable<IDownloader> downloaders, ResultPipeline resultPipeline, IEnumerable<KeyValuePair<string, Type>> pageAnalyzerTypes)
        {
            _maxDownloadThreadNumber = maxDownloadThreadNumber;
            _scheduler = scheduler;
            _downloaders = new List<IDownloader>();
            _downloaders.AddRange(downloaders);
            _downloaders.GroupBy(e => e.Topic).ToList().ForEach(g =>
            {
                if (g.Count() > 1)
                    throw new ArgumentException("downloader.Topic不能重复", nameof(downloaders));
            });

            _resultPipeline = resultPipeline;
            _pageAnalyzerTypes = new ConcurrentDictionary<string, Type>();
            pageAnalyzerTypes.ToList().ForEach(t => _pageAnalyzerTypes.TryAdd(t.Key, t.Value));
            _messagePullAutoResetEvent = new AutoResetEvent(false);
        }
Ejemplo n.º 3
0
        public void Run()
        {
            CheckIfRunning();

            Stat     = Status.Running;
            IsExited = false;

#if !NET_CORE
            // 开启多线程支持
            System.Net.ServicePointManager.DefaultConnectionLimit = 1000;
#endif

            InitComponent();

            IMonitorableScheduler monitor = (IMonitorableScheduler)Scheduler;

            if (StartTime == DateTime.MinValue)
            {
                StartTime = DateTime.Now;
            }

            Parallel.For(0, ThreadNum, new ParallelOptions
            {
                MaxDegreeOfParallelism = ThreadNum
            }, i =>
            {
                int waitCount  = 0;
                bool firstTask = false;

                var downloader = Downloader.Clone();

                while (Stat == Status.Running)
                {
                    Request request = Scheduler.Poll(this);

                    if (request == null)
                    {
                        if (waitCount > _waitCountLimit && IsExitWhenComplete)
                        {
                            Stat = Status.Finished;
                            break;
                        }

                        // wait until new url added
                        WaitNewUrl(ref waitCount);
                    }
                    else
                    {
                        Log.WriteLine($"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} Thread: {ThreadNum}");

                        waitCount = 0;

                        try
                        {
                            ProcessRequest(request, downloader);
                            Thread.Sleep(Site.SleepTime);
#if TEST
                            System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
                            sw.Reset();
                            sw.Start();
#endif

                            OnSuccess(request);
#if TEST
                            sw.Stop();
                            Console.WriteLine("OnSuccess:" + (sw.ElapsedMilliseconds).ToString());
#endif
                        }
                        catch (Exception e)
                        {
                            OnError(request);
                            Logger.Error("采集失败: " + request.Url + ".", e);
                        }
                        finally
                        {
#if !NET_CORE
                            if (Site.HttpProxyPoolEnable && request.GetExtra(Request.Proxy) != null)
                            {
                                Site.ReturnHttpProxyToPool((HttpHost)request.GetExtra(Request.Proxy), (int)request.GetExtra(Request.StatusCode));
                            }
#endif
                            FinishedPageCount.Inc();
                        }

                        if (!firstTask)
                        {
                            Thread.Sleep(3000);
                            firstTask = true;
                        }
                    }
                }
            });

            FinishedTime = DateTime.Now;

            foreach (IPipeline pipeline in Pipelines)
            {
                SafeDestroy(pipeline);
            }

            if (Stat == Status.Finished)
            {
                OnClose();

                Logger.Info($"任务 {Identity} 结束.");
            }

            if (Stat == Status.Stopped)
            {
                Logger.Info("任务 " + Identity + " 停止成功!");
            }

            SpiderClosingEvent?.Invoke();

            Log.WaitForExit();

            if (Stat == Status.Exited)
            {
                Logger.Info("任务 " + Identity + " 退出成功!");
            }

            IsExited = true;
        }
Ejemplo n.º 4
0
        public void InitComponent()
        {
            if (_init)
            {
#if NET_CORE
                Logger.Info($"Component already init.", true);
#else
                Logger.Info("Component already init.");
#endif

                return;
            }

            Console.CancelKeyPress += ConsoleCancelKeyPress;

            Scheduler.Init(this);

            if (Downloader == null)
            {
                //Downloader = new HttpClientDownloader();
            }

            Downloader.ThreadNum = ThreadNum;

            if (Pipelines.Count == 0)
            {
                Pipelines.Add(new FilePipeline());
            }
            if (ThreadPool == null)
            {
                ThreadPool = new CountableThreadPool(ThreadNum);
            }

            if (StartRequests != null)
            {
                if (StartRequests.Count > 0)
                {
                    Parallel.ForEach(StartRequests, new ParallelOptions()
                    {
                        MaxDegreeOfParallelism = 100
                    }, request =>
                    {
                        Scheduler.Push((Request)request.Clone(), this);
                    });

                    ClearStartRequests();

#if NET_CORE
                    Logger.Info("Push Request to Scheduler success.", true);
#else
                    Logger.Info("Push Request to Scheduler success.");
#endif
                }
                else
                {
#if NET_CORE
                    Logger.Info("Push Zero Request to Scheduler.", true);
#else
                    Logger.Info("Push Request to Scheduler success.");
#endif
                }
            }

            Task.Factory.StartNew(() =>
            {
                if (ShowConsoleStatus)
                {
                    IMonitorableScheduler monitor = Scheduler as IMonitorableScheduler;
                    if (monitor != null)
                    {
                        while (true)
                        {
                            try
                            {
                                if (Stat == Status.Running && !_waitingToExit)
                                {
                                    Console.WriteLine(
                                        $"Left: {monitor.GetLeftRequestsCount(this)} Total: {monitor.GetTotalRequestsCount(this)} AliveThread: {ThreadPool.ThreadAlive} ThreadNum: {ThreadPool.ThreadNum}");
                                }
                            }
                            catch
                            {
                                // ignored
                            }
                            Thread.Sleep(2000);
                        }
                    }
                }
            });

            _init = true;
        }