public void Run() { Spider spider = SpiderBuilder.CreateBuilder() .AddRequest("https://weixin.sogou.com/").Buid(); spider.AddPageProcessor(new Processor2()); for (int i = 1; i <= 10; i++) { spider.AddRequest($"https://weixin.sogou.com/weixin?type=2&ie=utf8&page={i}&query=马云"); } spider.NewRequestSleepInterval = 2000; // 2s // spider.EmptySleepTime = 60; // 60s spider.ThreadNumber = 5; //spider.UseNLog(); spider.UseRedisScheduler("localhost"); spider.Run(); spider.OnStatusChanged += Spider_OnStatusChanged; // Console.WriteLine("end main "); }
public void Run() { Spider spider = SpiderBuilder.CreateBuilder() .AddRequest("https://www.cnblogs.com/") .AddPageProcessor(new CNBlogProcessor()) .Buid(); spider.UseNLog(); // spider.UseRedisScheduler("localhost"); //spider.UseChromeWebDriverDownloader(@"C:\Users\admin\.nuget\packages\selenium.webdriver.chromedriver\2.44.0\driver\win32\"); // spider.UseChromeWebDriverDownloader(); //spider.AddDapperDataBasePipeline(new DapperDatabaseStore() //{ // OnSave = UseDapperStoreSave //}); //spider.SetDownloaderProxy(new WebProxy("127.0.0.1", 1080) //{ // // Credentials = new NetworkCredential("[USERNAME]", "[PASSWORD]") //}); //spider.SetDownloaderProxy(new DownloaderProxy(new WebProxy("127.0.0.1", 1080))); //spider.SetDownloaderProxy(new SimpleDownloaderProxyPools( // new WebProxy("127.0.0.1", 1080), // new WebProxy("192.168.1.1", 1080), // new WebProxy("192.168.1.2", 1080) // )); // proxy pools //spider.UseHttpProxyPools(100, 100, new WebProxy("127.0.0.1", 1080) //{ // Credentials = new NetworkCredential("[USERNAME]", "[PASSWORD]") //}); Random random = new Random(); spider.UseStaticSleepInterval = false; spider.NewRequestDynamicSleepInterval = () => random.Next(100, 1000); spider.OnNewRequesting += (_, interval) => { Console.WriteLine("sleep:" + interval); }; spider.Run(); }