public void ClearTest()
        {
            var scheduler = new RedisScheduler(_state);
            for (var i = 0; i < 100; ++i)
            {
                var page = CreatePageToCrawl("http://www.test.com/", i + ".html");
                scheduler.Add(page);
            }

            scheduler.Clear();
            Assert.That(scheduler.Count, Is.EqualTo(0));

            var server = _state.Connection.GetServer(_state.Connection.GetEndPoints().First());
            var count = server.Keys(_database, pattern: "CrawledPage_Test_*").LongCount();
            Assert.That(count, Is.EqualTo(0L));
        }
        public void AddTest()
        {
            var scheduler = new RedisScheduler(_state);
            var page = CreatePageToCrawl("http://www.test.com", 1 + ".html");
            scheduler.Add(page);

            var listKey = "PageToCrawl_Test";
            var url = "CrawledPage_Test_http://www.test.com/1.html";

            var db = _state.Connection.GetDatabase();
            Assert.That(db.KeyExists(listKey), Is.True);
            Assert.That(db.KeyExists(url), Is.True);
            Assert.That(db.ListLength(listKey), Is.EqualTo(1L));
            var json = (string) db.ListGetByIndex(listKey, 0);
            var page2 = JsonConvert.DeserializeObject<PageToCrawl>(json);
            Assert.That(page.Uri, Is.EqualTo(page2.Uri));
        }
示例#3
0
        protected override void MyInit(params string[] arguments)
        {
            ThreadNum  = 1;
            Scheduler  = new RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=,abortConnect=True,connectRetry=20");
            Downloader = new HttpClientDownloader();
            //Downloader.AddAfterDownloadCompleteHandler(new SubContentHandler
            //{
            //	Start = "json(",
            //	End = ");",
            //	StartOffset = 5,
            //	EndOffset = 0
            //});

            AddStartUrlBuilder(
                new DbStartUrlBuilder(Database.MySql, "Database='mysql';Data Source=localhost;User ID=root;Password=;Port=3306;SslMode=None;",
                                      $"SELECT * FROM jd.sku_v2_{DateTimeUtils.RunIdOfMonday} WHERE shopname is null or shopid is null order by sku",
                                      new[] { "sku" }, "http://chat1.jd.com/api/checkChat?my=list&pidList={0}&callback=json"));

            AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=;Port=3306;SslMode=None;"));
            AddEntityType(typeof(ProductUpdater));
        }
示例#4
0
            public MonitorSpiderListener(Core.Spider spider)
            {
                _spider = spider;

                if (spider.SaveStatusToRedis)
                {
                    Task.Factory.StartNew(() =>
                    {
#if !NET_CORE
                        RedisScheduler scheduler = spider.Scheduler as RedisScheduler;
                        if (scheduler != null)
                        {
                            ConnectionMultiplexer redis = scheduler.Redis;

                            IDatabase db = redis.GetDatabase(0);

                            while (true)
                            {
                                try
                                {
                                    if (Closed)
                                    {
                                        UpdateStatus(db);
                                        break;
                                    }

                                    UpdateStatus(db);
                                }
                                catch (Exception)
                                {
                                    // ignored
                                }

                                Thread.Sleep(3000);
                            }
                        }
#endif
                    });
                }
            }
示例#5
0
        public void RedisTest()
        {
            RedisScheduler redisScheduler = new RedisScheduler("localhost", "");

            ISpider spider = new DefaultSpider();

            redisScheduler.Clear();

            Request request = new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", 1, null);

            request.PutExtra("1", "2");
            redisScheduler.Push(request);
            Request result = redisScheduler.Poll();

            Assert.AreEqual("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", result.Url.ToString());
            Request result1 = redisScheduler.Poll();

            Assert.IsNull(result1);
            redisScheduler.Dispose();

            redisScheduler.Clear();
        }
示例#6
0
        public void Redis_QueueTest()
        {
            RedisScheduler redisScheduler = new RedisScheduler("localhost", "");

            ISpider spider   = new DefaultSpider();
            Request request1 = new Request("http://www.ibm.com/1", 1, null);
            Request request2 = new Request("http://www.ibm.com/2", 1, null);
            Request request3 = new Request("http://www.ibm.com/3", 1, null);
            Request request4 = new Request("http://www.ibm.com/4", 1, null);

            redisScheduler.Push(request1);
            redisScheduler.Push(request2);
            redisScheduler.Push(request3);
            redisScheduler.Push(request4);

            Request result = redisScheduler.Poll();

            Assert.AreEqual("http://www.ibm.com/4", result.Url.ToString());
            Request result1 = redisScheduler.Poll();

            Assert.AreEqual("http://www.ibm.com/3", result1.Url.ToString());
            redisScheduler.Dispose();
        }
示例#7
0
 protected override void MyInit()
 {
     CachedSize = 1;
     ThreadNum  = 8;
     Scheduler  = new RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20");
     Downloader = new HttpClientDownloader
     {
         DownloadCompleteHandlers = new IDownloadCompleteHandler[]
         {
             new SubContentHandler
             {
                 Start       = "json(",
                 End         = ");",
                 StartOffset = 5,
                 EndOffset   = 0
             }
         }
     };
     PrepareStartUrls = new PrepareStartUrls[]
     {
         new BaseDbPrepareStartUrls()
         {
             Source        = DataSource.MySql,
             ConnectString = "Database='test';Data Source= localhost;User ID=root;Password=1qazZAQ!;Port=3306",
             QueryString   = $"SELECT * FROM jd.sku_v2_{DateTimeUtils.RunIdOfMonday} WHERE shopname is null or shopid is null order by sku",
             Columns       = new [] { new DataColumn {
                                          Name = "sku"
                                      } },
             FormateStrings = new List <string> {
                 "http://chat1.jd.com/api/checkChat?my=list&pidList={0}&callback=json"
             }
         }
     };
     AddPipeline(new MySqlEntityPipeline("Database='taobao';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=4306"));
     AddEntityType(typeof(ProductUpdater));
 }
        protected override void MyInit()
        {
            Site site = new Site();

            using (var reader = new StreamReader(File.OpenRead("taobaokeyword.txt")))
            {
                string keyword;
                while (!string.IsNullOrEmpty(keyword = reader.ReadLine()))
                {
                    site.AddStartUrl("https://" + $"s.taobao.com/search?q={keyword}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}&fs=1&filter_tianmao=tmall", new Dictionary <string, object>
                    {
                        { "keyword", keyword }
                    });
                }
            }
            Site                  = site;
            ThreadNum             = 5;
            SkipWhenResultIsEmpty = true;
            Scheduler             = new RedisScheduler("127.0.0.1:6379,serviceName = DotnetSpider,keepAlive = 8,allowAdmin = True,connectTimeout = 10000,password = 6GS9F2QTkP36GggE0c3XwVwI,abortConnect = True,connectRetry = 20");
            Downloader            = new HttpClientDownloader
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        StartOffset = 16,
                        EndOffset   = 22,
                        Start       = "g_page_config = {",
                        End         = "g_srp_loadCss();"
                    },
                    new IncrementTargetUrlsCreator("&s=0", null, 44)
                }
            };
            AddPipeline(new MySqlEntityPipeline("Database = 'mysql'; Data Source = localhost; User ID = root; Password = 1qazZAQ!; Port = 3306"));
            AddEntityType(typeof(Item), new MyDataHanlder());
        }
        protected override void MyInit(params string[] arguments)
        {
            Scheduler = new RedisScheduler(Config.RedisConnectString);
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler
            {
                NewValue = "/",
                OldValue = "\\/",
            });
            downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsCreator("&s=0", null, 44));
            Downloader            = downloader;
            ThreadNum             = 1;
            SkipWhenResultIsEmpty = true;
            if (!arguments.Contains("noprepare"))
            {
                PrepareStartUrls = new PrepareStartUrls[]
                {
                    new BaseDbPrepareStartUrls
                    {
                        BulkInsert    = true,
                        ConnectString = Config.ConnectString,
                        QueryString   = "SELECT * FROM taobao.result_keywords limit 10000",
                        Columns       = new []
                        {
                            new DataColumn("bidwordstr"),
                            new DataColumn("tab")
                        },
                        FormateStrings = new List <string> {
                            "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}"
                        }
                    }
                };
            }
            AddEntityType(typeof(Item), new MyDataHanlder());
        }
示例#10
0
        public void RedisTest()
        {
            RedisScheduler redisScheduler = new RedisScheduler("localhost", "");

            ISpider spider          = new TestSpider();
            RedisSchedulerManager m = new RedisSchedulerManager("localhost");

            m.RemoveTask(spider.Identity);

            Request request = new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", 1, null);

            request.PutExtra("1", "2");
            redisScheduler.Push(request, spider);
            Request result = redisScheduler.Poll(spider);

            Assert.AreEqual("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/", result.Url.ToString());
            Request result1 = redisScheduler.Poll(spider);

            Assert.IsNull(result1);
            redisScheduler.Dispose();


            m.RemoveTask(spider.Identity);
        }