public virtual void Run(params string[] args) { try { Core.Spider spider = PrepareSpider(args); if (spider == null) { return; } RegisterControl(spider); spider.Start(); while (spider.StatusCode == Status.Stopped || spider.StatusCode == Status.Running || spider.StatusCode == Status.Init) { Thread.Sleep(1000); } spider?.Dispose(); DoValidate(); } finally { Log.WaitForExit(); } }
//[Ignore] //[TestMethod] //public void TestWaitAndNotify() //{ // for (int i = 0; i < 10000; i++) // { // Console.WriteLine("round " + i); // TestRound(); // } //} private void TestRound() { Core.Spider spider = Core.Spider.Create(new Site { SleepTime = 0 }, new TestPageProcessor(), new TestScheduler()).SetThreadNum(10); spider.Run(); }
public MonitorSpiderListener(Core.Spider spider) { _spider = spider; _userId = spider.UserId; _taskGroup = spider.TaskGroup; _errorRequestCollection = Encrypt.Md5Encrypt(_taskGroup) + "_error_request"; _mongoDatabaseName = "db_" + Encrypt.Md5Encrypt(_userId); if (spider.SaveStatus && !string.IsNullOrEmpty(StatusServer)) { spider.RequestedFailEvent += OnError; spider.RequestedSuccessEvent += OnSuccess; spider.SpiderClosingEvent += OnClose; Task.Factory.StartNew(() => { while (true) { try { PostStatus(); } catch (Exception) { // ignored } Thread.Sleep(5000); } }); } }
public virtual void Run(params string[] args) { try { spider = PrepareSpider(args); if (spider == null) { return; } RegisterControl(spider); spider.Start(); while (spider.StatusCode == Status.Running || spider.StatusCode == Status.Init) { Thread.Sleep(1000); } spider.Dispose(); AfterSpiderFinished?.Invoke(); DoValidate(); } finally { SpiderMonitor.Default.Dispose(); } }
public RedisStatusUpdater(Core.Spider spider, ISpiderStatus spiderStatus) { _spider = spider; _spiderStatus = spiderStatus; string host = ConfigurationManager.AppSettings["redishost"]; _password = ConfigurationManager.AppSettings["redishostpass"]; if (!string.IsNullOrEmpty(host)) { _pool = new RedisManagerPool(host); } }
public static void Run() { Site site = new Site(); site.AddStartUrl("http://www.36kr.com/"); Core.Spider thread = OoSpider.Create(site, new CollectorPageModelToDbPipeline(), typeof(Kr36NewsModel)).SetThreadNum(20); thread.Start(); SpiderMonitor spiderMonitor = SpiderMonitor.Instance; spiderMonitor.Register(thread); }
private void Register(Core.Spider spider, ISpiderStatus spiderStatus, MonitorSpiderListener monitorSpiderListener) { if (spider.ShowControl) { Form1 form1 = new Form1(spiderStatus); form1.ShowDialog(); } if (spider.SaveStatusInRedis) { RedisStatusUpdater statusUpdater = new RedisStatusUpdater(spider, spiderStatus); monitorSpiderListener.ClosingEvent += statusUpdater.UpdateStatus; statusUpdater.Run(); } }
public void TestStartAndStop() { HttpClientDownloader downloader = new HttpClientDownloader(); Core.Spider spider = Core.Spider.Create(new Site() { EncodingName = "UTF-8" }, new SimplePageProcessor("http://www.oschina.net/", "http://www.oschina.net/*")).AddPipeline(new TestPipeline()).SetThreadNum(1); Page p = downloader.Download(new Request("http://www.baidu.com/", 2, new Dictionary <string, dynamic>()), spider); Console.WriteLine(p.Content); spider.Start(); Thread.Sleep(10000); spider.Stop(); Thread.Sleep(10000); spider.Start(); Thread.Sleep(10000); }
public static void Run () { var site = new Site() { EncodingName = "UTF-8" }; for (int i = 1; i < 5; ++i) { site.AddStartUrl("http://" + $"www.youku.com/v_olist/c_97_g__a__sg__mt__lg__q__s_1_r_0_u_0_pt_0_av_0_ag_0_sg__pr__h__d_1_p_{i}.html"); } Core.Spider spider = Core.Spider.Create(site, new MyPageProcessor(), new QueueDuplicateRemovedScheduler()).AddPipeline(new MyPipeline()).SetThreadNum(1); //spider.SetDownloader(downloader); spider.Start(); Console.Read(); }
public void Run(params string[] args) { Core.Spider spider = null; try { spider = PrepareSpider(args); spider?.Run(); RunAfterSpiderFinished(); if (!string.IsNullOrEmpty(_validateReportTo)) { DoValidate(); } } finally { spider?.Dispose(); } }
public MonitorSpiderListener(Core.Spider spider) { _spider = spider; if (spider.SaveStatusToRedis) { Task.Factory.StartNew(() => { #if !NET_CORE RedisScheduler scheduler = spider.Scheduler as RedisScheduler; if (scheduler != null) { ConnectionMultiplexer redis = scheduler.Redis; IDatabase db = redis.GetDatabase(0); while (true) { try { if (Closed) { UpdateStatus(db); break; } UpdateStatus(db); } catch (Exception) { // ignored } Thread.Sleep(3000); } } #endif }); } }
private static void StartSpider(Options param) { ScriptProcessor pageProcessor = ScriptProcessorBuilder.Custom().Language(param.Lang).ScriptFromFile(param.File).Thread(param.Thread).Build(); pageProcessor.Site.SleepTime = param.Sleep; pageProcessor.Site.RetryTimes = 3; pageProcessor.Site.AcceptStatCode = new HashSet <int> { 200, 404, 403, 500, 502 }; Core.Spider spider = Core.Spider.Create(pageProcessor).SetThreadNum(param.Thread); spider.ClearPipeline(); StringBuilder builder = new StringBuilder(); using (StreamReader sr = new StreamReader(typeof(ScriptConsole).Assembly.GetManifestResourceStream("Java2Dotnet.Spider.Scripts.Resource.js.define.js"))) { string line; while ((line = sr.ReadLine()) != null) { builder.AppendLine(line); } } string script = builder + Environment.NewLine + File.ReadAllText(param.File); Jurassic.ScriptEngine engine = new Jurassic.ScriptEngine { EnableExposedClrTypes = true }; //engine.SetGlobalValue("page", new Page()); engine.SetGlobalValue("config", new Site()); engine.Evaluate(script); foreach (string url in param.Urls) { spider.AddUrl(url); } spider.Run(); }
private void RegisterControl(Core.Spider spider) { var redisScheduler = spider.Scheduler as Scheduler.RedisScheduler; if (redisScheduler != null) { try { redisScheduler.Redis.Subscribe($"{spider.Identity}", (c, m) => { switch (m) { case "stop": { spider.Stop(); break; } case "start": { spider.Start(); break; } case "exit": { spider.Exit(); break; } } }); } catch { // ignored } } }
private void RegisterControl(Core.Spider spider) { if (Redis != null) { try { Redis.GetSubscriber().Subscribe($"{spider.Identity}", (c, m) => { switch (m) { case "stop": { spider.Stop(); break; } case "start": { spider.Start(); break; } case "exit": { spider.Exit(); break; } } }); } catch { // ignored } } }
public SpiderStatus(Core.Spider spider, SpiderMonitor.MonitorSpiderListener monitorSpiderListener) { Spider = spider; MonitorSpiderListener = monitorSpiderListener; }
private ISpiderStatus GetSpiderStatus(Core.Spider spider, MonitorSpiderListener monitorSpiderListener) { return(new SpiderStatus(spider, monitorSpiderListener)); }
public virtual void Run(params string[] args) { try { spider = PrepareSpider(args); if (spider == null) { return; } var redisScheduler = spider.Scheduler as Scheduler.RedisScheduler; if (redisScheduler != null) { try { redisScheduler.Redis.Subscribe($"{spider.Identity}", (c, m) => { switch (m) { case "stop": { spider.Stop(); break; } case "start": { spider.Start(); break; } case "exit": { spider.Exit(); break; } } }); } catch { // ignored } } spider.Start(); while (spider.StatusCode == Status.Stopped || spider.StatusCode == Status.Running || spider.StatusCode == Status.Init) { Thread.Sleep(1000); } spider?.Dispose(); RunAfterSpiderFinished(); DoValidate(); } finally { Log.WaitForExit(); } }
public virtual void Run(params string[] args) { try { spider = PrepareSpider(args); if (spider == null) { return; } RegisterControl(spider); spider.Start(); while (spider.StatusCode == Status.Stopped || spider.StatusCode == Status.Running || spider.StatusCode == Status.Init) { Thread.Sleep(1000); } spider?.Dispose(); AfterSpiderFinished?.Invoke(); DoValidate(); } finally { Log.WaitForExit(); } }