public void LoadPerformace()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();
            ISpider spider = new DefaultSpider("test", new Site());

            scheduler.Init(spider);
            scheduler.Dispose();
            var start = DateTime.Now;

            for (int i = 0; i < 40000; i++)
            {
                scheduler.Push(new Request("http://www.a.com/" + i, null));
            }

            var    end     = DateTime.Now;
            double seconds = (end - start).TotalSeconds;

            scheduler.Dispose();

            var start1             = DateTime.Now;
            HashSet <Request> list = new HashSet <Request>();

            for (int i = 0; i < 40000; i++)
            {
                list.Add(new Request("http://www.a.com/" + i, null));
            }
            scheduler.Import(list);
            var    end1     = DateTime.Now;
            double seconds1 = (end1 - start1).TotalSeconds;

            Assert.True(seconds1 < seconds);
            scheduler.Dispose();
        }
        public void Load()
        {
            QueueDuplicateRemovedScheduler scheduler = new QueueDuplicateRemovedScheduler();
            ISpider spider = new DefaultSpider("test", new Site());

            scheduler.Init(spider);

            scheduler.Push(new Request("http://www.a.com/", null));
            scheduler.Push(new Request("http://www.b.com/", null));
            scheduler.Push(new Request("http://www.c.com/", null));
            scheduler.Push(new Request("http://www.d.com/", null));

            Extension.Scheduler.RedisScheduler redisScheduler = GetRedisScheduler();
            redisScheduler.Init(spider);

            redisScheduler.Dispose();

            redisScheduler.Import(scheduler.ToList());

            Assert.Equal("http://www.d.com/", redisScheduler.Poll().Url.ToString());
            Assert.Equal("http://www.c.com/", redisScheduler.Poll().Url.ToString());
            Assert.Equal("http://www.b.com/", redisScheduler.Poll().Url.ToString());
            Assert.Equal("http://www.a.com/", redisScheduler.Poll().Url.ToString());

            redisScheduler.Dispose();
        }
        public void PushAndPollDepthFirst()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();
            scheduler.DepthFirst = true;
            ISpider spider = new DefaultSpider();

            scheduler.Init(spider);
            scheduler.Dispose();
            Request request1 = new Request("http://www.ibm.com/1", null);
            Request request2 = new Request("http://www.ibm.com/2", null);
            Request request3 = new Request("http://www.ibm.com/3", null);
            Request request4 = new Request("http://www.ibm.com/4", null);

            scheduler.Push(request1);
            scheduler.Push(request2);
            scheduler.Push(request3);
            scheduler.Push(request4);

            Request result = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/4", result.Url.ToString());
            Request result1 = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/3", result1.Url.ToString());
            scheduler.Dispose();
            scheduler.Dispose();
        }
        public void Clear()
        {
            Extension.Scheduler.RedisScheduler scheduler = new Extension.Scheduler.RedisScheduler("localhost", "");

            ISpider spider = new DefaultSpider();

            scheduler.Init(spider);
            scheduler.Clear();
            Request request1 = new Request("http://www.ibm.com/1", 1, null);
            Request request2 = new Request("http://www.ibm.com/2", 1, null);
            Request request3 = new Request("http://www.ibm.com/3", 1, null);
            Request request4 = new Request("http://www.ibm.com/4", 1, null);

            scheduler.Push(request1);
            scheduler.Push(request2);
            scheduler.Push(request3);
            scheduler.Push(request4);

            Request result = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/4", result.Url.ToString());

            scheduler.Clear();
            scheduler.Dispose();
        }
        public void PushAndPoll1()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();

            ISpider spider = new DefaultSpider();

            scheduler.Init(spider);
            scheduler.Dispose();

            Request request = new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", null)
            {
                Site = spider.Site
            };

            request.PutExtra("1", "2");
            scheduler.Push(request);
            Request result = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", result.Url.ToString());
            Assert.Equal("2", request.GetExtra("1"));
            Request result1 = scheduler.Poll();

            Assert.Null(result1);
            scheduler.Dispose();
        }
Example #6
0
        public void PushAndPollBreadthFirst()
        {
            ISpider spider = new DefaultSpider();

            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler(spider.Identity);
            scheduler.TraverseStrategy = TraverseStrategy.Bfs;


            scheduler.Dispose();

            Request request1 = new Request("http://www.ibm.com/1", null);
            Request request2 = new Request("http://www.ibm.com/2", null);
            Request request3 = new Request("http://www.ibm.com/3", null);
            Request request4 = new Request("http://www.ibm.com/4", null);

            scheduler.Push(request1);
            scheduler.Push(request2);
            scheduler.Push(request3);
            scheduler.Push(request4);

            Request result = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/1", result.Url.ToString());
            Request result1 = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/2", result1.Url.ToString());
            scheduler.Dispose();
            scheduler.Dispose();
        }
 protected override void MyInit(params string[] arguments)
 {
     Identity              = "HaoBrowser Hao360Spider Buble " + DateTime.Now.ToString("yyyy-MM-dd HHmmss");
     CachedSize            = 1;
     ThreadNum             = 1;
     SkipWhenResultIsEmpty = true;
     Downloader            = new HttpClientDownloader
     {
         DownloadCompleteHandlers = new IDownloadCompleteHandler[]
         {
             new SubContentHandler {
                 Start       = "sales[\"hotsite_yixing\"] = [",
                 End         = "}}",
                 StartOffset = 27,
                 EndOffset   = 0
             },
             new ReplaceContentHandler {
                 NewValue = "/",
                 OldValue = "\\/",
             },
         }
     };
     Scheduler = new Extension.Scheduler.RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20");
     AddPipeline(new MySqlEntityPipeline("Database='testhao';Data Source= localhost;User ID=root;Password=root@123456;Port=3306"));
     AddStartUrl("https://hao.360.cn/");
     AddEntityType(typeof(UpdateHao360Info));
 }
        public void Status()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();
            ISpider spider = new DefaultSpider("test", new Site());

            scheduler.Init(spider);

            scheduler.Dispose();

            scheduler.Push(new Request("http://www.a.com/", null));
            scheduler.Push(new Request("http://www.b.com/", null));
            scheduler.Push(new Request("http://www.c.com/", null));
            scheduler.Push(new Request("http://www.d.com/", null));

            Assert.Equal(0, scheduler.GetErrorRequestsCount());
            Assert.Equal(4, scheduler.GetLeftRequestsCount());
            Assert.Equal(4, scheduler.GetTotalRequestsCount());
            scheduler.IncreaseErrorCounter();
            Assert.Equal(1, scheduler.GetErrorRequestsCount());
            Assert.Equal(0, scheduler.GetSuccessRequestsCount());
            scheduler.IncreaseSuccessCounter();
            Assert.Equal(1, scheduler.GetSuccessRequestsCount());

            scheduler.Poll();
            Assert.Equal(3, scheduler.GetLeftRequestsCount());
            Assert.Equal(1, scheduler.GetSuccessRequestsCount());
            Assert.Equal(1, scheduler.GetErrorRequestsCount());
            Assert.Equal(4, scheduler.GetTotalRequestsCount());

            scheduler.Poll();
            Assert.Equal(2, scheduler.GetLeftRequestsCount());
            Assert.Equal(1, scheduler.GetSuccessRequestsCount());
            Assert.Equal(1, scheduler.GetErrorRequestsCount());
            Assert.Equal(4, scheduler.GetTotalRequestsCount());

            scheduler.Poll();
            Assert.Equal(1, scheduler.GetLeftRequestsCount());
            Assert.Equal(1, scheduler.GetSuccessRequestsCount());
            Assert.Equal(1, scheduler.GetErrorRequestsCount());
            Assert.Equal(4, scheduler.GetTotalRequestsCount());

            scheduler.Poll();
            Assert.Equal(0, scheduler.GetLeftRequestsCount());
            Assert.Equal(1, scheduler.GetSuccessRequestsCount());
            Assert.Equal(1, scheduler.GetErrorRequestsCount());
            Assert.Equal(4, scheduler.GetTotalRequestsCount());

            scheduler.Poll();
            scheduler.Poll();
            Assert.Equal(0, scheduler.GetLeftRequestsCount());
            Assert.Equal(1, scheduler.GetSuccessRequestsCount());
            Assert.Equal(1, scheduler.GetErrorRequestsCount());
            Assert.Equal(4, scheduler.GetTotalRequestsCount());

            scheduler.Dispose();
        }
Example #9
0
        public void Status()
        {
            ISpider spider = new DefaultSpider("test");

            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler(spider.Identity);

            scheduler.Dispose();

            scheduler.Push(new Request("http://www.a.com/", null));
            scheduler.Push(new Request("http://www.b.com/", null));
            scheduler.Push(new Request("http://www.c.com/", null));
            scheduler.Push(new Request("http://www.d.com/", null));

            Assert.Equal(0, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.LeftRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);
            scheduler.IncreaseErrorCount();
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(0, scheduler.SuccessRequestsCount);
            scheduler.IncreaseSuccessCount();
            Assert.Equal(1, scheduler.SuccessRequestsCount);

            scheduler.Poll();
            Assert.Equal(3, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            Assert.Equal(2, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            Assert.Equal(1, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            Assert.Equal(0, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            scheduler.Poll();
            Assert.Equal(0, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Dispose();
        }
Example #10
0
        public void PushAndPoll1()
        {
            ISpider spider = new DefaultSpider();

            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler(spider.Identity);
            scheduler.Dispose();

            Request request = new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", null);

            request.Properties.Add("1", "2");
            scheduler.Push(request);
            Request result = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", result.Url.ToString());
            Assert.Equal("2", request.Properties["1"]);
            Request result1 = scheduler.Poll();

            Assert.Null(result1);
            scheduler.Dispose();
        }
        public void MultiInit()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();

            ISpider spider = new DefaultSpider();

            scheduler.Init(spider);
            string queueKey        = scheduler.GetQueueKey();
            string setKey          = scheduler.GetSetKey();
            string itemKey         = scheduler.GetItemKey();
            string errorCountKey   = scheduler.GetErrorCountKey();
            string successCountKey = scheduler.GetSuccessCountKey();

            scheduler.Init(spider);
            Assert.Equal(queueKey, scheduler.GetQueueKey());
            Assert.Equal(setKey, scheduler.GetSetKey());
            Assert.Equal(itemKey, scheduler.GetItemKey());
            Assert.Equal(errorCountKey, scheduler.GetErrorCountKey());
            Assert.Equal(successCountKey, scheduler.GetSuccessCountKey());

            scheduler.Dispose();
            scheduler.Dispose();
        }
        public void Clear()
        {
            ISpider spider = new DefaultSpider();

            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler(spider.Identity);



            scheduler.Dispose();
            Request request1 = new Request("http://www.ibm.com/1", null)
            {
                Site = spider.Site
            };
            Request request2 = new Request("http://www.ibm.com/2", null)
            {
                Site = spider.Site
            };
            Request request3 = new Request("http://www.ibm.com/3", null)
            {
                Site = spider.Site
            };
            Request request4 = new Request("http://www.ibm.com/4", null)
            {
                Site = spider.Site
            };

            scheduler.Push(request1);
            scheduler.Push(request2);
            scheduler.Push(request3);
            scheduler.Push(request4);

            Request result = scheduler.Poll();

            Assert.Equal("http://www.ibm.com/4", result.Url.ToString());

            scheduler.Dispose();
        }