Exemplo n.º 1
0
        public void CloseSignal()
        {
            Spider spider = Spider.Create(new Site {
                CycleRetryTimes = 5, EncodingName = "UTF-8"
            },
                                          new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider.ClearSchedulerAfterCompleted = false;
            for (int i = 0; i < 20; ++i)
            {
                spider.AddStartUrl($"http://www.baidu.com/_t={i}");
            }
            var task = spider.RunAsync();

            Thread.Sleep(500);
            spider.SendExitSignal();
            task.Wait();
            Assert.Equal(10, spider.Scheduler.SuccessRequestsCount);

            Spider spider2 = Spider.Create(new Site {
                CycleRetryTimes = 5, EncodingName = "UTF-8"
            },
                                           new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider2.ClearSchedulerAfterCompleted = false;
            for (int i = 0; i < 25; ++i)
            {
                spider2.AddStartUrl($"http://www.baidu.com/_t={i}");
            }
            spider2.Run();
            Assert.Equal(25, spider2.Scheduler.SuccessRequestsCount);
        }
Exemplo n.º 2
0
        public void DatebaseLogAndStatus()
        {
            Env.Reload();
            string id        = Guid.NewGuid().ToString("N");
            string taskGroup = Guid.NewGuid().ToString("N");
            string userId    = Guid.NewGuid().ToString("N");

            using (Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            },
                                                 id,
                                                 new QueueDuplicateRemovedScheduler(),
                                                 new TestPageProcessor()))
            {
                spider.Monitor = new DbMonitor(spider.TaskId, spider.Identity);
                spider.AddPipeline(new TestPipeline());
                spider.ThreadNum = 1;
                for (int i = 0; i < 5; i++)
                {
                    spider.AddStartUrl("http://www.baidu.com/" + i);
                }
                spider.Run();
            }
            Thread.Sleep(3000);
            using (var conn = (Env.SystemConnectionStringSettings.GetDbConnection()))
            {
                Assert.StartsWith("Crawl complete, cost", conn.Query <Log>($"SELECT * FROM DotnetSpider.Log where Identity='{id}'").Last().message);
                Assert.Equal(1, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM DotnetSpider.Status where Identity='{id}'").First().Count);
                Assert.Equal("Finished", conn.Query <statusObj>($"SELECT * FROM DotnetSpider.Status where Identity='{id}'").First().status);
            }
        }
Exemplo n.º 3
0
        public void DatebaseLogAndStatus()
        {
            string id        = Guid.NewGuid().ToString("N");
            string taskGroup = Guid.NewGuid().ToString("N");
            string userId    = Guid.NewGuid().ToString("N");

            using (Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            },
                                                 id,
                                                 new QueueDuplicateRemovedScheduler(),
                                                 new TestPageProcessor()))
            {
                spider.Monitor = new MySqlMonitor(spider.TaskId, spider.Identity, false, "Database='mysql';Data Source=localhost;User ID=root;Port=3306;SslMode=None;");
                spider.AddPipeline(new TestPipeline());
                spider.ThreadNum = 1;
                for (int i = 0; i < 5; i++)
                {
                    spider.AddStartUrl("http://www.baidu.com/" + i);
                }
                spider.Run();
            }
            Thread.Sleep(3000);
            using (var conn = new MySqlConnection("Database='mysql';Data Source=localhost;User ID=root;Port=3306;SslMode=None;"))
            {
                Assert.StartsWith("Crawl complete, cost", conn.Query <Log>($"SELECT * FROM DotnetSpider.Log where Identity='{id}'").Last().message);
                Assert.Equal(1, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM DotnetSpider.Status where Identity='{id}'").First().Count);
                Assert.Equal("Finished", conn.Query <statusObj>($"SELECT * FROM DotnetSpider.Status where Identity='{id}'").First().status);
            }
        }
Exemplo n.º 4
0
        public void DatebaseLogAndStatus()
        {
            LogUtil.Init();

            string id = Guid.NewGuid().ToString("N");

            Env.NodeId = "DEFAULT";
            using (Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8"
            },
                                                 id,
                                                 new QueueDuplicateRemovedScheduler(),
                                                 new TestPageProcessor()))
            {
                spider.Downloader = new TestDownloader();
                spider.TaskId     = "1";
                spider.Monitor    = new MySqlMonitor(spider.TaskId, spider.Identity, false, "Database='mysql';Data Source=localhost;User ID=root;Port=3306;SslMode=None;");
                spider.AddPipeline(new TestPipeline());
                for (int i = 0; i < 5; i++)
                {
                    Serilog.Log.Logger.Information("add start url" + i, id);
                    spider.AddStartUrl("http://www.baidu.com/" + i);
                }
                spider.EmptySleepTime = 1000;
                spider.Run();
            }
            using (var conn = new MySqlConnection("Database='mysql';Data Source=localhost;User ID=root;Port=3306;SslMode=None;"))
            {
                var logs = conn.Query <Log>($"SELECT * FROM dotnetspider.log where identity='{id}'").ToList();
                Assert.StartsWith("Crawl complete, cost", logs[logs.Count - 1].message);
                Assert.Equal(1, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM dotnetspider.status where identity='{id}'").First().Count);
                Assert.Equal("Finished", conn.Query <statusObj>($"SELECT * FROM dotnetspider.status where identity='{id}'").First().status);
            }
        }
Exemplo n.º 5
0
        public void DatebaseLogAndStatus()
        {
            string id            = Guid.NewGuid().ToString("N");
            string taskGroup     = Guid.NewGuid().ToString("N");
            string userId        = Guid.NewGuid().ToString("N");
            string connectString = "Database='test';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306";

            Configuration.SetValue("logAndStatusConnectString", connectString);
            Assert.Equal("Database='test';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306", Configuration.GetValue("logAndStatusConnectString"));

            using (Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", MinSleepTime = 1000
            },
                                                 id,
                                                 userId,
                                                 taskGroup,
                                                 new TestPageProcessor(), new QueueDuplicateRemovedScheduler()))
            {
                spider.AddPipeline(new TestPipeline()).SetThreadNum(1);
                for (int i = 0; i < 5; i++)
                {
                    spider.AddStartUrl("http://www.baidu.com/" + i);
                }
                MonitorCenter.Register(spider);
                spider.Run();
            }
            using (MySqlConnection conn = new MySqlConnection(connectString))
            {
                Assert.Equal(1, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM dotnetspider.status where userid='{userId}' and taskgroup='{taskGroup}' and identity='{id}'").First().Count);
                Assert.Equal(7, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM dotnetspider.log where userid='{userId}' and taskgroup='{taskGroup}' and identity='{id}'").First().Count);
            }
        }
Exemplo n.º 6
0
        public static void Main(string[] args)
        {
            var path = "www.baidu.com.cookies";

            if (File.Exists(path))
            {
                File.Delete(path);
            }
            File.WriteAllText(path, "a=b&c=d");

            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider.ThreadNum = 1;
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new TimerUpdateCookieHandler(5, new FileCookieInject()));
            spider.Downloader = downloader;

            for (int i = 0; i < 10000; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.Run();
        }
Exemplo n.º 7
0
        public void DatebaseLogAndStatus()
        {
            string id            = Guid.NewGuid().ToString("N");
            string taskGroup     = Guid.NewGuid().ToString("N");
            string userId        = Guid.NewGuid().ToString("N");
            string connectString = "Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306";

            Core.Infrastructure.Config.SetValue("connectString", connectString);
            Assert.AreEqual("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306", Core.Infrastructure.Config.GetValue("connectString"));

            using (Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            },
                                                 id,
                                                 new QueueDuplicateRemovedScheduler(),
                                                 new TestPageProcessor()))
            {
                spider.AddPipeline(new TestPipeline());
                spider.ThreadNum = 1;
                for (int i = 0; i < 5; i++)
                {
                    spider.AddStartUrl("http://www.baidu.com/" + i);
                }
                spider.Monitor = new DbMonitor(id);
                spider.Run();
            }
            using (MySqlConnection conn = new MySqlConnection(connectString))
            {
                Assert.AreEqual(15, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM dotnetspider.log where identity='{id}'").First().Count);
                Assert.AreEqual(1, conn.Query <CountResult>($"SELECT COUNT(*) as Count FROM dotnetspider.status where identity='{id}'").First().Count);
            }
        }
Exemplo n.º 8
0
        public void FastExit()
        {
            if (Environment.GetEnvironmentVariable("TRAVIS") == "1")
            {
                return;
            }
            var path = "FastExit_Result.txt";

            if (File.Exists(path))
            {
                File.Delete(path);
            }
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            Spider spider = Spider.Create(new Site {
                CycleRetryTimes = 5, EncodingName = "UTF-8", SleepTime = 0
            },
                                          new FastExitPageProcessor())
                            .AddPipeline(new FastExitPipeline());

            spider.ThreadNum      = 1;
            spider.EmptySleepTime = 0;
            spider.AddStartUrl("http://item.jd.com/1013286.html?_t=1");
            spider.AddStartUrl("http://item.jd.com/1013286.html?_t=2");
            spider.AddStartUrl("http://item.jd.com/1013286.html?_t=3");
            spider.AddStartUrl("http://item.jd.com/1013286.html?_t=4");
            spider.AddStartUrl("http://item.jd.com/1013286.html?_t=5");
            spider.Run();
            stopwatch.Stop();
            var costTime = stopwatch.ElapsedMilliseconds;

            Assert.True(costTime < 3000);
            var results = File.ReadAllLines("FastExit_Result.txt");

            Assert.Contains("http://item.jd.com/1013286.html?_t=1", results);
            Assert.Contains("http://item.jd.com/1013286.html?_t=2", results);
            Assert.Contains("http://item.jd.com/1013286.html?_t=3", results);
            Assert.Contains("http://item.jd.com/1013286.html?_t=4", results);
            Assert.Contains("http://item.jd.com/1013286.html?_t=5", results);
        }
Exemplo n.º 9
0
        public void RetryWhenResultIsEmpty()
        {
            Spider spider = Spider.Create(new Site {
                CycleRetryTimes = 5, EncodingName = "UTF-8", SleepTime = 1000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider.ThreadNum = 1;
            spider.AddStartUrl("http://taobao.com");
            spider.Run();

            Assert.Equal(Status.Finished, spider.Status);
        }
Exemplo n.º 10
0
        public void TestRetryWhenResultIsEmpty()
        {
            Spider spider = Spider.Create(new Site {
                CycleRetryTimes = 5, EncodingName = "UTF-8", MinSleepTime = 1000, Timeout = 20000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline()).SetThreadNum(1);

            spider.AddStartUrl("http://taobao.com");
            spider.RetryWhenResultIsEmpty = true;
            spider.Run();

            Assert.AreEqual(Status.Finished, spider.StatusCode);
        }
Exemplo n.º 11
0
        public void RunAsyncAndStop()
        {
            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", MinSleepTime = 1000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline()).SetThreadNum(1);

            for (int i = 0; i < 10000; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.RunAsync();
            Thread.Sleep(5000);
            spider.Stop();
            Thread.Sleep(5000);
            spider.RunAsync();
            Thread.Sleep(5000);
        }
Exemplo n.º 12
0
        public void RunAsyncAndStopThenExit()
        {
            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider.ThreadNum = 1;
            for (int i = 0; i < 10000; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.RunAsync();
            Thread.Sleep(5000);
            spider.Pause(() =>
            {
                spider.Exit();
            });
            Thread.Sleep(5000);
        }
Exemplo n.º 13
0
        public static void Main(string[] args)
        {
            IocExtension.ServiceCollection.AddSingleton <IMonitorService, NLogMonitor>();

            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", MinSleepTime = 1000
            }, new SpiderTest.TestPageProcessor()).AddPipeline(new SpiderTest.TestPipeline()).SetThreadNum(1);

            spider.SetDownloader(new TestDownloader());
            for (int i = 0; i < 10000; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.Run();
            Thread.Sleep(5000);
            spider.Stop();
            Thread.Sleep(5000);
            spider.RunAsync();
            Thread.Sleep(5000);
        }
Exemplo n.º 14
0
        public static void Main(string[] args)
        {
#if NET_CORE
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
#endif
            IocContainer.Default.AddSingleton <IMonitor, NLogMonitor>();

            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", MinSleepTime = 1000
            }, new SpiderTest.TestPageProcessor()).AddPipeline(new SpiderTest.TestPipeline()).SetThreadNum(1);
            spider.SetDownloader(new TestDownloader());
            for (int i = 0; i < 10; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.Run();
            Thread.Sleep(5000);
            spider.Stop();
            Thread.Sleep(5000);
            spider.RunAsync();
            Thread.Sleep(5000);
        }
Exemplo n.º 15
0
        public void RunAsyncAndContiune()
        {
            if (Environment.GetEnvironmentVariable("TRAVIS") == "1")
            {
                return;
            }
            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider.ThreadNum = 1;
            for (int i = 0; i < 10000; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.RunAsync();
            Thread.Sleep(5000);
            spider.Pause(() =>
            {
                spider.Contiune();
            });
            Thread.Sleep(5000);
        }