示例#1
0
        public static void Run(string loaction)
        {
            Downloader.GetInstance().Start();

            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddThroughMessageQueue();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            });
            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "网易云音乐";                      // 设置任务名称
            spider.Speed = 10;                           // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 5;                            // 设置采集深度

            spider.AddDataFlow(new MusicListDataParser());
            //spider.AddRequests("https://music.163.com/#/playlist?id=2964757969"); // 设置起始链接
            spider.AddRequests(loaction);
            spider.RunAsync();  // 启动
        }
示例#2
0
        public static Task Run()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddKafka();
            });
            var provider = builder.Build();

            var spider = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "博客园全站采集";                    // 设置任务名称
            spider.Speed = 1;                            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 3;                            // 设置采集深度
            spider.AddDataFlow(new DataParser <EntitySpider.CnblogsEntry>())
            .AddDataFlow(spider.GetDefaultStorage());
            spider.AddRequests(
                new Request("https://news.cnblogs.com/n/page/1/", new Dictionary <string, string> {
                { "网站", "博客园" }
            }),
                new Request("https://news.cnblogs.com/n/page/2/", new Dictionary <string, string> {
                { "网站", "博客园" }
            }));
            return(spider.RunAsync());            // 启动
        }
示例#3
0
        public static Task Run2()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            }).Register <EntitySpider>();
            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "博客园全站采集";                    // 设置任务名称
            spider.Speed = 1;                            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 3;                            // 设置采集深度
            var options = provider.GetRequiredService <SpiderOptions>();

            spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new MongoEntityStorage(options.StorageConnectionString));
            spider.AddRequests("http://www.cnblogs.com/"); // 设置起始链接
            return(spider.RunAsync());                     // 启动
        }
示例#4
0
        public static void Run1()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            });

            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "博客园全站采集";                    // 设置任务名称
            spider.Speed = 1;                            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 3;                            // 设置采集深度
            spider.AddDataFlow(new DataParser
            {
                SelectableFactory = context => context.GetSelectable(ContentType.Html),
                Required          = DataParserHelper.CheckIfRequiredByRegex("cnblogs\\.com"),
                GetFollowRequests = DataParserHelper.QueryFollowRequestsByXPath(".")
            }).AddDataFlow(new ConsoleStorage());          // 控制台打印采集结果
            spider.AddRequests("http://www.cnblogs.com/"); // 设置起始链接
            spider.RunAsync();                             // 启动
        }
示例#5
0
        public static void Run()
        {
            ImageDownloader.GetInstance().Start();

            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddThroughMessageQueue();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            }).Register <EntitySpider>();
            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "宅男女神图片采集";                   // 设置任务名称
            spider.Speed = 2;                            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 5;                            // 设置采集深度
            //spider.AddDataFlow(new NvshensTagIndexDataParser());
            spider.AddDataFlow(new NvshensFirstPageTagDataParser());
            spider.AddDataFlow(new NvshensPageTagDataParser());
            spider.AddDataFlow(new NvshensFirstPageDetailDataParser());
            spider.AddDataFlow(new NvshensPageDetailDataParser());
            //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接
            spider.AddRequests("https://www.nvshens.com/gallery/luoli/"); // 设置起始链接
            spider.RunAsync();                                            // 启动
        }
示例#6
0
        public static Task Run()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddLocalMessageQueue();
                services.AddLocalDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddLocalDownloadCenter();
                services.AddSpiderStatisticsCenter(x => x.UseMemory());
            });
            var provider = builder.Build();

            var spider = provider.Create <Spider>();

            spider.NewGuidId();                                         // 设置任务标识
            spider.Name  = "博客园全站采集";                                   // 设置任务名称
            spider.Speed = 1;                                           // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 3;                                           // 设置采集深度
            spider.DownloaderSettings.Type = DownloaderType.HttpClient; // 使用普通下载器, 无关 Cookie, 干净的 HttpClient
            spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new ConsoleStorage());
            spider.AddRequests(new Request("http://www.cnblogs.com/", new Dictionary <string, string>
            {
                { "key1", "value1" }
            }));                       // 设置起始链接
            return(spider.RunAsync()); // 启动
        }
示例#7
0
        public static void StartWithHost(string[] args)
        {
            var configure = new LoggerConfiguration()
#if DEBUG
                            .MinimumLevel.Verbose()
#else
                            .MinimumLevel.Information()
#endif
                            .MinimumLevel.Override("Microsoft", LogEventLevel.Warning)
                            .Enrich.FromLogContext()
                            .WriteTo.Console().WriteTo
                            .RollingFile("dotnet-spider.log");

            Log.Logger = configure.CreateLogger();

            var hostBuilder = new SpiderHostBuilder()
                              .ConfigureAppConfiguration(x =>
            {
                if (File.Exists("appsettings.json"))
                {
                    x.AddJsonFile("appsettings.json");
                }

                x.AddCommandLine(args);
                //x.AddEnvironmentVariables();
            })
                              .ConfigureLogging(x => { x.AddSerilog(); })
                              .ConfigureServices((services) =>
            {
                services.AddLocalEventBus();
                services.AddSingleton <IScheduler>(new MyScheduler());
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent((x) =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter((x) =>
                {
                    x.UseMemory();
                });
            });

            hostBuilder.Register <IndexSpider>();
            hostBuilder.Register <InfoSpider>();
            var host = hostBuilder.Build();

            host.Start();

            var  spider1 = host.Create <IndexSpider>();
            Task task    = spider1.RunAsync();

            task.ContinueWith((t) =>
            {
                var spider2 = host.Create <InfoSpider>();
                spider2.RunAsync(args);
            });
        }
示例#8
0
        protected override void ConfigureService(IConfiguration configuration, SpiderHostBuilder builder)
        {
            builder.ConfigureLogging(b =>
            {
#if DEBUG
                b.SetMinimumLevel(LogLevel.Debug);
#else
                b.SetMinimumLevel(LogLevel.Information);
#endif
                b.AddSerilog();
            });

            var config = configuration["DOTNET_SPIDER_CONFIG"];
            builder.ConfigureAppConfiguration(x =>
            {
                if (!string.IsNullOrWhiteSpace(config) && File.Exists(config))
                {
                    // 添加 JSON 配置文件
                    x.AddJsonFile(config);
                }
                else
                {
                    if (File.Exists("appsettings.json"))
                    {
                        x.AddJsonFile("appsettings.json");
                    }
                }
                x.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings);
                x.AddEnvironmentVariables();
            });

            var distributed = configuration["DOTNET_SPIDER_DISTRIBUTED"] == "false";

            builder.ConfigureServices(services =>
            {
                if (distributed)
                {
                    services.AddLocalMessageQueue();
                    services.AddLocalDownloadCenter();
                    services.AddDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    services.AddStatisticsCenter(x =>
                    {
                        // 添加内存统计服务
                        x.UseMemory();
                    });
                }
                else
                {
                    services.AddKafka();
                }
            });
        }
示例#9
0
        protected override void ConfigureService(IConfiguration configuration, SpiderHostBuilder builder)
        {
            builder.ConfigureLogging(b =>
            {
#if DEBUG
                b.SetMinimumLevel(LogLevel.Debug);
#else
                b.SetMinimumLevel(LogLevel.Information);
#endif
                b.AddSerilog();
            });

            var config = configuration["config"];
            builder.ConfigureAppConfiguration(b =>
            {
                if (!string.IsNullOrWhiteSpace(config) && File.Exists(config))
                {
                    // 添加 JSON 配置文件
                    b.AddJsonFile(config);
                }
                else
                {
                    b.AddJsonFile("appsettings.json");
                }

                b.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings);
                b.AddEnvironmentVariables();
            });

            var local = configuration["local"] == "true";

            builder.ConfigureServices(services =>
            {
                if (local)
                {
                    services.AddLocalEventBus();
                    services.AddLocalDownloadCenter();
                    services.AddDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    services.AddStatisticsCenter(x =>
                    {
                        // 添加内存统计服务
                        x.UseMemory();
                    });
                }
                else
                {
                    services.AddKafkaEventBus();
                }
            });
        }
示例#10
0
        public void RetryWhenResultIsEmpty()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            });

            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.NewGuidId();
            spider.Name                   = "RetryWhenResultIsEmpty";
            spider.EmptySleepTime         = 15;
            spider.RetryWhenResultIsEmpty = true;
            spider.Scheduler              = new QueueDistinctBfsScheduler();
            spider.AddRequests(new Request("http://www.RetryWhenResultIsEmpty.com")
            {
                DownloaderType = DownloaderType.Empty,
                RetryTimes     = 5
            });
            spider.RunAsync().Wait();

            var statisticsStore = provider.GetRequiredService <IStatisticsStore>();
            var s = statisticsStore.GetSpiderStatisticsAsync(spider.Id).Result;

            var dss = statisticsStore.GetDownloadStatisticsListAsync(1, 10).Result;

            while (dss.Count == 0)
            {
                Thread.Sleep(1000);
            }

            var ds = dss[0];

            Assert.Equal(1, s.Total);
            Assert.Equal(1, s.Failed);
            Assert.Equal(0, s.Success);

            Assert.Equal(0, ds.Failed);
            Assert.Equal(6, ds.Success);
        }
示例#11
0
        static void Main(string[] args)
        {
            ImageDownloader.GetInstance().Start();
//			var configure = new LoggerConfiguration()
//#if DEBUG
//				.MinimumLevel.Verbose()
//#else
//				.MinimumLevel.Information()
//#endif
//				.MinimumLevel.Override("Microsoft", LogEventLevel.Warning)
//				.Enrich.FromLogContext()
//				.WriteTo.Console().WriteTo
//				.RollingFile("dotnet-spider.log");
//			Log.Logger = configure.CreateLogger();

            //Startup.Execute<SsqSpider>(args);
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                //services.AddKafkaEventBus();
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            });
            var provider = builder.Build();
            var spider   = provider.Create <Spider>();

            spider.Id    = Guid.NewGuid().ToString("N"); // 设置任务标识
            spider.Name  = "优美图片采集";                     // 设置任务名称
            spider.Speed = 2;                            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 5;                            // 设置采集深度
            spider.AddDataFlow(new YouMeiSpider());
            spider.AddDataFlow(new YouMeiDetailSpider());
            //spider.AddDataFlow(new NvshensPageTagDataParser());
            //spider.AddDataFlow(new NvshensFirstPageDetailDataParser());
            //spider.AddDataFlow(new NvshensPageDetailDataParser());
            //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接
            spider.AddRequests("http://www.umei.cc/p/gaoqing/cn/1.htm"); // 设置起始链接
            spider.RunAsync();                                           // 启动

            // await DistributedSpider.Run();
            Console.Read();
        }
示例#12
0
        public static async Task Run()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddKafkaEventBus();
                services.AddDownloadCenter(x => x.UseLocalDownloaderAgentStore());
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            });
            var provider = builder.Build();


            var bus = provider.GetRequiredService <IEventBus>();

            bus.Subscribe("test-topic", evt => { Console.WriteLine("i am consumer 1"); });
            bus.Subscribe("test-topic", evt => { Console.WriteLine("i am consumer 2"); });
            for (int i = 0; i < 100; ++i)
            {
                await bus.PublishAsync("test-topic", new Event());
            }

            Console.Read();

            var spider = provider.Create <Spider>();

            spider.NewGuidId();           // 设置任务标识
            spider.Name  = "博客园全站采集";     // 设置任务名称
            spider.Speed = 10;            // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
            spider.Depth = 3;             // 设置采集深度
            spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new ConsoleStorage());
            spider.AddRequests(new Request("http://www.cnblogs.com/", new Dictionary <string, string>
            {
                { "key1", "value1" }
            }));                     // 设置起始链接
            await spider.RunAsync(); // 启动
        }
示例#13
0
        protected SpiderHostBuilder GetLocalSpiderHostBuilder()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddThroughMessageQueue();
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            });

            return(builder);
        }
示例#14
0
 public SpiderStart()
 {
     _hostBuilder = new SpiderHostBuilder()
                    .ConfigureLogging(x => x.AddSerilog())
                    .ConfigureAppConfiguration(x => {
         x.AddJsonFile("appsettings.json");
     })
                    .ConfigureServices(services =>
     {
         services.AddLocalMessageQueue();
         services.AddDownloadCenter(x => x.UseLocalDownloaderAgentStore());
         services.AddDownloaderAgent(x =>
         {
             x.UseFileLocker();
             x.UseDefaultAdslRedialer();
             x.UseDefaultInternetDetector();
         });
         services.AddStatisticsCenter(x => x.UseMemory());
     });
     _provider = _hostBuilder.Build();
 }
示例#15
0
        /// <summary>
        /// 运行爬虫
        /// </summary>
        /// <returns></returns>
        public static Task Run()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloadAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseRedis());
            }).Register <PCNewsSpider>();
            var provider = builder.Build();
            var spider   = provider.Create <PCNewsSpider>();

            return(spider.RunAsync());
        }
示例#16
0
		public static Task Run()
		{
			var builder = new SpiderHostBuilder()
				.ConfigureLogging(x => x.AddSerilog())
				.ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
				.ConfigureServices(services =>
				{
					services.AddLocalMessageQueue();
					services.AddLocalDownloaderAgent(x =>
					{
						x.UseFileLocker();
						x.UseDefaultAdslRedialer();
						x.UseDefaultInternetDetector();
					});
					services.AddLocalDownloadCenter();
					services.AddSpiderStatisticsCenter(x => x.UseMemory());
				}).Register<EntitySpider>();
			var provider = builder.Build();
			var spider = provider.Create<EntitySpider>();
			return spider.RunAsync();
		}
示例#17
0
        /// <summary>
        /// 运行爬虫
        /// </summary>
        /// <returns></returns>
        public static Task Run()
        {
            var builder = new SpiderHostBuilder()
                          .ConfigureLogging(x => x.AddSerilog())
                          .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                          .ConfigureServices(services =>
            {
                //这里也使用本地事件注入,目前不需要分布式消息队列Kafka
                services.AddLocalEventBus();
                services.AddLocalDownloadCenter();
                services.AddDownloadAgent(x =>
                {
                    x.UseFileLocker();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter(x => x.UseMemory());
            }).Register <WeiXinSoGouSpider>();
            var provider = builder.Build();
            var spider   = provider.Create <WeiXinSoGouSpider>();

            return(spider.RunAsync());
        }
示例#18
0
        public static async Task Run()
        {
            try
            {
                var builder = new SpiderHostBuilder()
                              .ConfigureLogging(x => x.AddSerilog())
                              .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json"))
                              .ConfigureServices(services =>
                {
                    services.AddLocalEventBus();
                    services.AddDownloadCenter(x => x.UseLocalDownloaderAgentStore());
                    services.AddDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    services.AddStatisticsCenter(x => x.UseMemory());
                });
                var provider = builder.Build();

                var spider = provider.Create <Spider>();
                spider.NewGuidId();       // 设置任务标识
                spider.Name  = "博客园全站采集"; // 设置任务名称
                spider.Speed = 10;        // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0.
                spider.Depth = 3;         // 设置采集深度
                spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new ConsoleStorage());
                spider.AddRequests(new Request("http://www.cnblogs.com/", new Dictionary <string, string>
                {
                    { "key1", "value1" }
                }));                     // 设置起始链接
                await spider.RunAsync(); // 启动
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
示例#19
0
        /// <summary>
        /// 运行
        /// </summary>
        /// <param name="args">运行参数</param>
        public static void Execute(params string[] args)
        {
            ConfigureSerialLog();

            Framework.SetEncoding();

            Framework.SetMultiThread();

            var configurationBuilder = new ConfigurationBuilder();

            configurationBuilder.SetBasePath(AppDomain.CurrentDomain.BaseDirectory);
            configurationBuilder.AddEnvironmentVariables();
            configurationBuilder.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings);
            var configuration = configurationBuilder.Build();

            string spiderTypeName = configuration["type"];

            if (string.IsNullOrWhiteSpace(spiderTypeName))
            {
                Log.Logger.Error("未指定需要执行的爬虫类型");
                return;
            }

            var name      = configuration["name"];
            var id        = configuration["id"] ?? Guid.NewGuid().ToString("N");
            var config    = configuration["config"];
            var arguments = configuration["args"]?.Split(' ');
            var local     = configuration["local"] == "true";

            PrintEnvironment(args);

            var spiderTypes = DetectSpiders();

            if (spiderTypes == null || spiderTypes.Count == 0)
            {
                return;
            }

            var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spiderTypeName.ToLower());

            if (spiderType == null)
            {
                Log.Logger.Error($"未找到爬虫: {spiderTypeName}", 0, ConsoleColor.DarkYellow);
                return;
            }

            var builder = new SpiderHostBuilder();

            builder.ConfigureLogging(b =>
            {
#if DEBUG
                b.SetMinimumLevel(LogLevel.Debug);
#else
                b.SetMinimumLevel(LogLevel.Information);
#endif
                b.AddSerilog();
            });
            builder.ConfigureAppConfiguration(b =>
            {
                // 添加 JSON 配置文件
                b.AddJsonFile(config);
                b.AddCommandLine(args);
                b.AddEnvironmentVariables();
            });

            if (local)
            {
                builder.ConfigureServices(b =>
                {
                    b.AddLocalEventBus();
                    b.AddLocalDownloadCenter();
                    b.AddDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    b.AddStatisticsCenter(x =>
                    {
                        // 添加内存统计服务
                        x.UseMemory();
                    });
                });
            }
            else
            {
                builder.ConfigureServices(b => { b.AddKafkaEventBus(); });
            }

            builder.Register(spiderType);
            var provider = builder.Build();
            var instance = provider.Create(spiderType);
            if (instance != null)
            {
                instance.Name = name;
                instance.Id   = id;
                instance.RunAsync(arguments).ConfigureAwait(false).GetAwaiter();
            }
            else
            {
                Log.Logger.Error("创建爬虫对象失败", 0, ConsoleColor.DarkYellow);
            }
        }
示例#20
0
        /// <summary>
        /// 运行
        /// </summary>
        /// <param name="args">运行参数</param>
        public static void Run(params string[] args)
        {
            Framework.SetEncoding();

            var    configurationBuilder = Framework.CreateConfigurationBuilder(null, args);
            var    configuration        = configurationBuilder.Build();
            string spider = configuration["spider"];

            if (string.IsNullOrWhiteSpace(spider))
            {
                throw new SpiderException("未指定需要执行的爬虫");
            }

            var name       = configuration["name"];
            var id         = configuration["id"] ?? Guid.NewGuid().ToString("N");
            var config     = configuration["config"];
            var arguments  = configuration["args"]?.Split(' ');
            var distribute = configuration["distribute"] == "true";

            PrintEnvironment(args);

            var spiderTypes = DetectSpiders();

            if (spiderTypes == null || spiderTypes.Count == 0)
            {
                return;
            }

            var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spider.ToLower());

            if (spiderType == null)
            {
                ConsoleHelper.WriteLine($"未找到爬虫: {spider}", 0, ConsoleColor.DarkYellow);
                return;
            }

            var builder = new SpiderHostBuilder();

            builder.ConfigureLogging(b =>
            {
#if DEBUG
                b.SetMinimumLevel(LogLevel.Debug);
#else
                b.SetMinimumLevel(LogLevel.Information);
#endif
                b.AddSerilog();
            });
            builder.ConfigureAppConfiguration(b =>
            {
                // 添加 JSON 配置文件
                b.AddJsonFile(config);
                b.AddCommandLine(args);
            });

            if (!distribute)
            {
                builder.ConfigureServices(b =>
                {
                    b.AddLocalMessageQueue();
                    b.AddLocalDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    b.AddLocalDownloadCenter();
                    b.AddSpiderStatisticsCenter(x =>
                    {
                        // 添加内存统计服务
                        x.UseMemory();
                    });
                });
            }

            builder.Register(spiderType);
            var provider = builder.Build();
            var instance = provider.Create(spiderType);
            if (instance != null)
            {
                instance.Name = name;
                instance.Id   = id;
                instance.RunAsync(arguments);
            }
            else
            {
                ConsoleHelper.WriteLine("创建爬虫对象失败", 0, ConsoleColor.DarkYellow);
            }
        }
示例#21
0
        static void Main(string[] args)
        {
            try
            {
                var builder = new SpiderHostBuilder();

                var configurationBuilder = Framework.CreateConfigurationBuilder(null, args);
                var configuration        = configurationBuilder.Build();
                var @class   = configuration["dotnetspider.spider.class"];
                var spiderId = configuration["dotnetspider.spider.id"];

                @class   = "DotnetSpider.Spiders.CnblogsSpider";
                spiderId = "xxxxxxxx";


                var folder = Directory.Exists("/logs/") ? "/logs/" : "";

                var logPath = string.IsNullOrWhiteSpace(spiderId)
                                        ? $"{folder}{DateTime.Now:yyyy-MM-dd HH:mm:ss}.log"
                                        : $"{folder}{spiderId}.log";

                var loggerConfiguration = new LoggerConfiguration()
                                          .MinimumLevel.Information()
                                          .MinimumLevel.Override("Microsoft", LogEventLevel.Warning)
                                          .Enrich.FromLogContext()
                                          .WriteTo.Console().WriteTo
                                          .RollingFile(logPath);
                Log.Logger = loggerConfiguration.CreateLogger();

                var spiderName = configuration["dotnetspider.spider.name"];
                if (string.IsNullOrWhiteSpace(@class) ||
                    string.IsNullOrWhiteSpace(spiderId) ||
                    string.IsNullOrWhiteSpace(spiderName)
                    )
                {
                    Log.Logger.Error($"执行爬虫的参数不正确: class {@class}, id {spiderId}, name {spiderName}");
                    return;
                }

                var type = Type.GetType(@class);
                if (type == null)
                {
                    Log.Logger.Error($"未找到爬虫类型: {@class}");
                    return;
                }

                Log.Logger.Information($"获取爬虫类型 {type.FullName} 成功");
                builder.ConfigureAppConfiguration(x =>
                {
                    x.AddCommandLine(args);
                });
                builder.ConfigureLogging(x =>
                {
                    x.AddSerilog();
                });
                builder.ConfigureServices(services =>
                {
                    services.AddKafkaMessageQueue();
                });
                builder.Register(type);
                var provider = builder.Build();

                var spider = provider.Create(type);
                Log.Logger.Information($"创建爬虫实例成功");
                spider.Id   = spiderId;
                spider.Name = spiderName;

                Log.Logger.Information($"尝试启动爬虫实例");
                spider.Run();

                Log.Logger.Information($"爬虫实例退出");
            }
            catch (Exception e)
            {
                Log.Logger.Error($"执行失败: {e}");
            }
        }