Ejemplo n.º 1
0
        public virtual void Run(params string[] args)
        {
            try
            {
                Core.Spider spider = PrepareSpider(args);

                if (spider == null)
                {
                    return;
                }

                RegisterControl(spider);

                spider.Start();

                while (spider.StatusCode == Status.Stopped || spider.StatusCode == Status.Running || spider.StatusCode == Status.Init)
                {
                    Thread.Sleep(1000);
                }

                spider?.Dispose();

                DoValidate();
            }
            finally
            {
                Log.WaitForExit();
            }
        }
Ejemplo n.º 2
0
        //[Ignore]
        //[TestMethod]
        //public void TestWaitAndNotify()
        //{
        //	for (int i = 0; i < 10000; i++)
        //	{
        //		Console.WriteLine("round " + i);
        //		TestRound();
        //	}
        //}

        private void TestRound()
        {
            Core.Spider spider = Core.Spider.Create(new Site {
                SleepTime = 0
            }, new TestPageProcessor(), new TestScheduler()).SetThreadNum(10);
            spider.Run();
        }
Ejemplo n.º 3
0
            public MonitorSpiderListener(Core.Spider spider)
            {
                _spider = spider;

                _userId    = spider.UserId;
                _taskGroup = spider.TaskGroup;
                _errorRequestCollection = Encrypt.Md5Encrypt(_taskGroup) + "_error_request";
                _mongoDatabaseName      = "db_" + Encrypt.Md5Encrypt(_userId);

                if (spider.SaveStatus && !string.IsNullOrEmpty(StatusServer))
                {
                    spider.RequestedFailEvent    += OnError;
                    spider.RequestedSuccessEvent += OnSuccess;
                    spider.SpiderClosingEvent    += OnClose;

                    Task.Factory.StartNew(() =>
                    {
                        while (true)
                        {
                            try
                            {
                                PostStatus();
                            }
                            catch (Exception)
                            {
                                // ignored
                            }

                            Thread.Sleep(5000);
                        }
                    });
                }
            }
Ejemplo n.º 4
0
        public virtual void Run(params string[] args)
        {
            try
            {
                spider = PrepareSpider(args);

                if (spider == null)
                {
                    return;
                }

                RegisterControl(spider);

                spider.Start();

                while (spider.StatusCode == Status.Running || spider.StatusCode == Status.Init)
                {
                    Thread.Sleep(1000);
                }

                spider.Dispose();

                AfterSpiderFinished?.Invoke();

                DoValidate();
            }
            finally
            {
                SpiderMonitor.Default.Dispose();
            }
        }
Ejemplo n.º 5
0
 public RedisStatusUpdater(Core.Spider spider, ISpiderStatus spiderStatus)
 {
     _spider = spider;
     _spiderStatus = spiderStatus;
     string host = ConfigurationManager.AppSettings["redishost"];
     _password = ConfigurationManager.AppSettings["redishostpass"];
     if (!string.IsNullOrEmpty(host))
     {
         _pool = new RedisManagerPool(host);
     }
 }
Ejemplo n.º 6
0
        public static void Run()
        {
            Site site = new Site();

            site.AddStartUrl("http://www.36kr.com/");
            Core.Spider thread = OoSpider.Create(site, new CollectorPageModelToDbPipeline(), typeof(Kr36NewsModel)).SetThreadNum(20);
            thread.Start();
            SpiderMonitor spiderMonitor = SpiderMonitor.Instance;

            spiderMonitor.Register(thread);
        }
Ejemplo n.º 7
0
        public RedisStatusUpdater(Core.Spider spider, ISpiderStatus spiderStatus)
        {
            _spider       = spider;
            _spiderStatus = spiderStatus;
            string host = ConfigurationManager.AppSettings["redishost"];

            _password = ConfigurationManager.AppSettings["redishostpass"];
            if (!string.IsNullOrEmpty(host))
            {
                _pool = new RedisManagerPool(host);
            }
        }
Ejemplo n.º 8
0
        private void Register(Core.Spider spider, ISpiderStatus spiderStatus, MonitorSpiderListener monitorSpiderListener)
        {
            if (spider.ShowControl)
            {
                Form1 form1 = new Form1(spiderStatus);
                form1.ShowDialog();
            }

            if (spider.SaveStatusInRedis)
            {
                RedisStatusUpdater statusUpdater = new RedisStatusUpdater(spider, spiderStatus);
                monitorSpiderListener.ClosingEvent += statusUpdater.UpdateStatus;
                statusUpdater.Run();
            }
        }
Ejemplo n.º 9
0
        public void TestStartAndStop()
        {
            HttpClientDownloader downloader = new HttpClientDownloader();

            Core.Spider spider = Core.Spider.Create(new Site()
            {
                EncodingName = "UTF-8"
            }, new SimplePageProcessor("http://www.oschina.net/", "http://www.oschina.net/*")).AddPipeline(new TestPipeline()).SetThreadNum(1);
            Page p = downloader.Download(new Request("http://www.baidu.com/", 2, new Dictionary <string, dynamic>()), spider);

            Console.WriteLine(p.Content);
            spider.Start();
            Thread.Sleep(10000);
            spider.Stop();
            Thread.Sleep(10000);
            spider.Start();
            Thread.Sleep(10000);
        }
Ejemplo n.º 10
0
        public static void Run
            ()
        {
            var site = new Site()
            {
                EncodingName = "UTF-8"
            };

            for (int i = 1; i < 5; ++i)
            {
                site.AddStartUrl("http://" + $"www.youku.com/v_olist/c_97_g__a__sg__mt__lg__q__s_1_r_0_u_0_pt_0_av_0_ag_0_sg__pr__h__d_1_p_{i}.html");
            }

            Core.Spider spider = Core.Spider.Create(site, new MyPageProcessor(), new QueueDuplicateRemovedScheduler()).AddPipeline(new MyPipeline()).SetThreadNum(1);

            //spider.SetDownloader(downloader);
            spider.Start();
            Console.Read();
        }
Ejemplo n.º 11
0
        public void Run(params string[] args)
        {
            Core.Spider spider = null;
            try
            {
                spider = PrepareSpider(args);
                spider?.Run();

                RunAfterSpiderFinished();

                if (!string.IsNullOrEmpty(_validateReportTo))
                {
                    DoValidate();
                }
            }
            finally
            {
                spider?.Dispose();
            }
        }
Ejemplo n.º 12
0
            public MonitorSpiderListener(Core.Spider spider)
            {
                _spider = spider;

                if (spider.SaveStatusToRedis)
                {
                    Task.Factory.StartNew(() =>
                    {
#if !NET_CORE
                        RedisScheduler scheduler = spider.Scheduler as RedisScheduler;
                        if (scheduler != null)
                        {
                            ConnectionMultiplexer redis = scheduler.Redis;

                            IDatabase db = redis.GetDatabase(0);

                            while (true)
                            {
                                try
                                {
                                    if (Closed)
                                    {
                                        UpdateStatus(db);
                                        break;
                                    }

                                    UpdateStatus(db);
                                }
                                catch (Exception)
                                {
                                    // ignored
                                }

                                Thread.Sleep(3000);
                            }
                        }
#endif
                    });
                }
            }
Ejemplo n.º 13
0
        private static void StartSpider(Options param)
        {
            ScriptProcessor pageProcessor = ScriptProcessorBuilder.Custom().Language(param.Lang).ScriptFromFile(param.File).Thread(param.Thread).Build();

            pageProcessor.Site.SleepTime      = param.Sleep;
            pageProcessor.Site.RetryTimes     = 3;
            pageProcessor.Site.AcceptStatCode = new HashSet <int> {
                200, 404, 403, 500, 502
            };
            Core.Spider spider = Core.Spider.Create(pageProcessor).SetThreadNum(param.Thread);
            spider.ClearPipeline();

            StringBuilder builder = new StringBuilder();

            using (StreamReader sr = new StreamReader(typeof(ScriptConsole).Assembly.GetManifestResourceStream("Java2Dotnet.Spider.Scripts.Resource.js.define.js")))
            {
                string line;

                while ((line = sr.ReadLine()) != null)
                {
                    builder.AppendLine(line);
                }
            }

            string script = builder + Environment.NewLine + File.ReadAllText(param.File);

            Jurassic.ScriptEngine engine = new Jurassic.ScriptEngine {
                EnableExposedClrTypes = true
            };
            //engine.SetGlobalValue("page", new Page());
            engine.SetGlobalValue("config", new Site());

            engine.Evaluate(script);

            foreach (string url in param.Urls)
            {
                spider.AddUrl(url);
            }
            spider.Run();
        }
Ejemplo n.º 14
0
        private void RegisterControl(Core.Spider spider)
        {
            var redisScheduler = spider.Scheduler as Scheduler.RedisScheduler;

            if (redisScheduler != null)
            {
                try
                {
                    redisScheduler.Redis.Subscribe($"{spider.Identity}", (c, m) =>
                    {
                        switch (m)
                        {
                        case "stop":
                            {
                                spider.Stop();
                                break;
                            }

                        case "start":
                            {
                                spider.Start();
                                break;
                            }

                        case "exit":
                            {
                                spider.Exit();
                                break;
                            }
                        }
                    });
                }
                catch
                {
                    // ignored
                }
            }
        }
Ejemplo n.º 15
0
        private void RegisterControl(Core.Spider spider)
        {
            if (Redis != null)
            {
                try
                {
                    Redis.GetSubscriber().Subscribe($"{spider.Identity}", (c, m) =>
                    {
                        switch (m)
                        {
                        case "stop":
                            {
                                spider.Stop();
                                break;
                            }

                        case "start":
                            {
                                spider.Start();
                                break;
                            }

                        case "exit":
                            {
                                spider.Exit();
                                break;
                            }
                        }
                    });
                }
                catch
                {
                    // ignored
                }
            }
        }
Ejemplo n.º 16
0
            public MonitorSpiderListener(Core.Spider spider)
            {
                _spider = spider;

                _userId = spider.UserId;
                _taskGroup = spider.TaskGroup;
                _errorRequestCollection = Encrypt.Md5Encrypt(_taskGroup) + "_error_request";
                _mongoDatabaseName = "db_" + Encrypt.Md5Encrypt(_userId);

                if (spider.SaveStatus && !string.IsNullOrEmpty(StatusServer))
                {
                    spider.RequestedFailEvent += OnError;
                    spider.RequestedSuccessEvent += OnSuccess;
                    spider.SpiderClosingEvent += OnClose;

                    Task.Factory.StartNew(() =>
                    {
                        while (true)
                        {
                            try
                            {
                                PostStatus();
                            }
                            catch (Exception)
                            {
                                // ignored
                            }

                            Thread.Sleep(5000);
                        }
                    });
                }
            }
Ejemplo n.º 17
0
 public SpiderStatus(Core.Spider spider, SpiderMonitor.MonitorSpiderListener monitorSpiderListener)
 {
     Spider = spider;
     MonitorSpiderListener = monitorSpiderListener;
 }
Ejemplo n.º 18
0
 private ISpiderStatus GetSpiderStatus(Core.Spider spider, MonitorSpiderListener monitorSpiderListener)
 {
     return(new SpiderStatus(spider, monitorSpiderListener));
 }
Ejemplo n.º 19
0
 public SpiderStatus(Core.Spider spider, SpiderMonitor.MonitorSpiderListener monitorSpiderListener)
 {
     Spider = spider;
     MonitorSpiderListener = monitorSpiderListener;
 }
Ejemplo n.º 20
0
        public virtual void Run(params string[] args)
        {
            try
            {
                spider = PrepareSpider(args);
                if (spider == null)
                {
                    return;
                }

                var redisScheduler = spider.Scheduler as Scheduler.RedisScheduler;
                if (redisScheduler != null)
                {
                    try
                    {
                        redisScheduler.Redis.Subscribe($"{spider.Identity}", (c, m) =>
                        {
                            switch (m)
                            {
                            case "stop":
                                {
                                    spider.Stop();
                                    break;
                                }

                            case "start":
                                {
                                    spider.Start();
                                    break;
                                }

                            case "exit":
                                {
                                    spider.Exit();
                                    break;
                                }
                            }
                        });
                    }
                    catch
                    {
                        // ignored
                    }
                }

                spider.Start();

                while (spider.StatusCode == Status.Stopped || spider.StatusCode == Status.Running || spider.StatusCode == Status.Init)
                {
                    Thread.Sleep(1000);
                }

                spider?.Dispose();

                RunAfterSpiderFinished();

                DoValidate();
            }
            finally
            {
                Log.WaitForExit();
            }
        }
Ejemplo n.º 21
0
        public virtual void Run(params string[] args)
        {
            try
            {
                spider = PrepareSpider(args);

                if (spider == null)
                {
                    return;
                }

                RegisterControl(spider);

                spider.Start();

                while (spider.StatusCode == Status.Stopped || spider.StatusCode == Status.Running || spider.StatusCode == Status.Init)
                {
                    Thread.Sleep(1000);
                }

                spider?.Dispose();

                AfterSpiderFinished?.Invoke();

                DoValidate();
            }
            finally
            {
                Log.WaitForExit();
            }
        }