public static void Run(string loaction) { Downloader.GetInstance().Start(); var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddThroughMessageQueue(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "网易云音乐"; // 设置任务名称 spider.Speed = 10; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 5; // 设置采集深度 spider.AddDataFlow(new MusicListDataParser()); //spider.AddRequests("https://music.163.com/#/playlist?id=2964757969"); // 设置起始链接 spider.AddRequests(loaction); spider.RunAsync(); // 启动 }
public static Task Run() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddKafka(); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "博客园全站采集"; // 设置任务名称 spider.Speed = 1; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 3; // 设置采集深度 spider.AddDataFlow(new DataParser <EntitySpider.CnblogsEntry>()) .AddDataFlow(spider.GetDefaultStorage()); spider.AddRequests( new Request("https://news.cnblogs.com/n/page/1/", new Dictionary <string, string> { { "网站", "博客园" } }), new Request("https://news.cnblogs.com/n/page/2/", new Dictionary <string, string> { { "网站", "博客园" } })); return(spider.RunAsync()); // 启动 }
public static Task Run2() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }).Register <EntitySpider>(); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "博客园全站采集"; // 设置任务名称 spider.Speed = 1; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 3; // 设置采集深度 var options = provider.GetRequiredService <SpiderOptions>(); spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new MongoEntityStorage(options.StorageConnectionString)); spider.AddRequests("http://www.cnblogs.com/"); // 设置起始链接 return(spider.RunAsync()); // 启动 }
public static void Run1() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "博客园全站采集"; // 设置任务名称 spider.Speed = 1; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 3; // 设置采集深度 spider.AddDataFlow(new DataParser { SelectableFactory = context => context.GetSelectable(ContentType.Html), Required = DataParserHelper.CheckIfRequiredByRegex("cnblogs\\.com"), GetFollowRequests = DataParserHelper.QueryFollowRequestsByXPath(".") }).AddDataFlow(new ConsoleStorage()); // 控制台打印采集结果 spider.AddRequests("http://www.cnblogs.com/"); // 设置起始链接 spider.RunAsync(); // 启动 }
public static void Run() { ImageDownloader.GetInstance().Start(); var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddThroughMessageQueue(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }).Register <EntitySpider>(); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "宅男女神图片采集"; // 设置任务名称 spider.Speed = 2; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 5; // 设置采集深度 //spider.AddDataFlow(new NvshensTagIndexDataParser()); spider.AddDataFlow(new NvshensFirstPageTagDataParser()); spider.AddDataFlow(new NvshensPageTagDataParser()); spider.AddDataFlow(new NvshensFirstPageDetailDataParser()); spider.AddDataFlow(new NvshensPageDetailDataParser()); //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接 spider.AddRequests("https://www.nvshens.com/gallery/luoli/"); // 设置起始链接 spider.RunAsync(); // 启动 }
public static Task Run() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalMessageQueue(); services.AddLocalDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddLocalDownloadCenter(); services.AddSpiderStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.NewGuidId(); // 设置任务标识 spider.Name = "博客园全站采集"; // 设置任务名称 spider.Speed = 1; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 3; // 设置采集深度 spider.DownloaderSettings.Type = DownloaderType.HttpClient; // 使用普通下载器, 无关 Cookie, 干净的 HttpClient spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new ConsoleStorage()); spider.AddRequests(new Request("http://www.cnblogs.com/", new Dictionary <string, string> { { "key1", "value1" } })); // 设置起始链接 return(spider.RunAsync()); // 启动 }
public static void StartWithHost(string[] args) { var configure = new LoggerConfiguration() #if DEBUG .MinimumLevel.Verbose() #else .MinimumLevel.Information() #endif .MinimumLevel.Override("Microsoft", LogEventLevel.Warning) .Enrich.FromLogContext() .WriteTo.Console().WriteTo .RollingFile("dotnet-spider.log"); Log.Logger = configure.CreateLogger(); var hostBuilder = new SpiderHostBuilder() .ConfigureAppConfiguration(x => { if (File.Exists("appsettings.json")) { x.AddJsonFile("appsettings.json"); } x.AddCommandLine(args); //x.AddEnvironmentVariables(); }) .ConfigureLogging(x => { x.AddSerilog(); }) .ConfigureServices((services) => { services.AddLocalEventBus(); services.AddSingleton <IScheduler>(new MyScheduler()); services.AddLocalDownloadCenter(); services.AddDownloaderAgent((x) => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter((x) => { x.UseMemory(); }); }); hostBuilder.Register <IndexSpider>(); hostBuilder.Register <InfoSpider>(); var host = hostBuilder.Build(); host.Start(); var spider1 = host.Create <IndexSpider>(); Task task = spider1.RunAsync(); task.ContinueWith((t) => { var spider2 = host.Create <InfoSpider>(); spider2.RunAsync(args); }); }
protected override void ConfigureService(IConfiguration configuration, SpiderHostBuilder builder) { builder.ConfigureLogging(b => { #if DEBUG b.SetMinimumLevel(LogLevel.Debug); #else b.SetMinimumLevel(LogLevel.Information); #endif b.AddSerilog(); }); var config = configuration["DOTNET_SPIDER_CONFIG"]; builder.ConfigureAppConfiguration(x => { if (!string.IsNullOrWhiteSpace(config) && File.Exists(config)) { // 添加 JSON 配置文件 x.AddJsonFile(config); } else { if (File.Exists("appsettings.json")) { x.AddJsonFile("appsettings.json"); } } x.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings); x.AddEnvironmentVariables(); }); var distributed = configuration["DOTNET_SPIDER_DISTRIBUTED"] == "false"; builder.ConfigureServices(services => { if (distributed) { services.AddLocalMessageQueue(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => { // 添加内存统计服务 x.UseMemory(); }); } else { services.AddKafka(); } }); }
protected override void ConfigureService(IConfiguration configuration, SpiderHostBuilder builder) { builder.ConfigureLogging(b => { #if DEBUG b.SetMinimumLevel(LogLevel.Debug); #else b.SetMinimumLevel(LogLevel.Information); #endif b.AddSerilog(); }); var config = configuration["config"]; builder.ConfigureAppConfiguration(b => { if (!string.IsNullOrWhiteSpace(config) && File.Exists(config)) { // 添加 JSON 配置文件 b.AddJsonFile(config); } else { b.AddJsonFile("appsettings.json"); } b.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings); b.AddEnvironmentVariables(); }); var local = configuration["local"] == "true"; builder.ConfigureServices(services => { if (local) { services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => { // 添加内存统计服务 x.UseMemory(); }); } else { services.AddKafkaEventBus(); } }); }
public void RetryWhenResultIsEmpty() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.NewGuidId(); spider.Name = "RetryWhenResultIsEmpty"; spider.EmptySleepTime = 15; spider.RetryWhenResultIsEmpty = true; spider.Scheduler = new QueueDistinctBfsScheduler(); spider.AddRequests(new Request("http://www.RetryWhenResultIsEmpty.com") { DownloaderType = DownloaderType.Empty, RetryTimes = 5 }); spider.RunAsync().Wait(); var statisticsStore = provider.GetRequiredService <IStatisticsStore>(); var s = statisticsStore.GetSpiderStatisticsAsync(spider.Id).Result; var dss = statisticsStore.GetDownloadStatisticsListAsync(1, 10).Result; while (dss.Count == 0) { Thread.Sleep(1000); } var ds = dss[0]; Assert.Equal(1, s.Total); Assert.Equal(1, s.Failed); Assert.Equal(0, s.Success); Assert.Equal(0, ds.Failed); Assert.Equal(6, ds.Success); }
static void Main(string[] args) { ImageDownloader.GetInstance().Start(); // var configure = new LoggerConfiguration() //#if DEBUG // .MinimumLevel.Verbose() //#else // .MinimumLevel.Information() //#endif // .MinimumLevel.Override("Microsoft", LogEventLevel.Warning) // .Enrich.FromLogContext() // .WriteTo.Console().WriteTo // .RollingFile("dotnet-spider.log"); // Log.Logger = configure.CreateLogger(); //Startup.Execute<SsqSpider>(args); var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { //services.AddKafkaEventBus(); services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.Id = Guid.NewGuid().ToString("N"); // 设置任务标识 spider.Name = "优美图片采集"; // 设置任务名称 spider.Speed = 2; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 5; // 设置采集深度 spider.AddDataFlow(new YouMeiSpider()); spider.AddDataFlow(new YouMeiDetailSpider()); //spider.AddDataFlow(new NvshensPageTagDataParser()); //spider.AddDataFlow(new NvshensFirstPageDetailDataParser()); //spider.AddDataFlow(new NvshensPageDetailDataParser()); //spider.AddRequests("https://www.nvshens.com/gallery/"); // 设置起始链接 spider.AddRequests("http://www.umei.cc/p/gaoqing/cn/1.htm"); // 设置起始链接 spider.RunAsync(); // 启动 // await DistributedSpider.Run(); Console.Read(); }
public static async Task Run() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddKafkaEventBus(); services.AddDownloadCenter(x => x.UseLocalDownloaderAgentStore()); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var bus = provider.GetRequiredService <IEventBus>(); bus.Subscribe("test-topic", evt => { Console.WriteLine("i am consumer 1"); }); bus.Subscribe("test-topic", evt => { Console.WriteLine("i am consumer 2"); }); for (int i = 0; i < 100; ++i) { await bus.PublishAsync("test-topic", new Event()); } Console.Read(); var spider = provider.Create <Spider>(); spider.NewGuidId(); // 设置任务标识 spider.Name = "博客园全站采集"; // 设置任务名称 spider.Speed = 10; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 3; // 设置采集深度 spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new ConsoleStorage()); spider.AddRequests(new Request("http://www.cnblogs.com/", new Dictionary <string, string> { { "key1", "value1" } })); // 设置起始链接 await spider.RunAsync(); // 启动 }
protected SpiderHostBuilder GetLocalSpiderHostBuilder() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddThroughMessageQueue(); services.AddLocalDownloadCenter(); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); return(builder); }
public SpiderStart() { _hostBuilder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => { x.AddJsonFile("appsettings.json"); }) .ConfigureServices(services => { services.AddLocalMessageQueue(); services.AddDownloadCenter(x => x.UseLocalDownloaderAgentStore()); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); _provider = _hostBuilder.Build(); }
/// <summary> /// 运行爬虫 /// </summary> /// <returns></returns> public static Task Run() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloadAgent(x => { x.UseFileLocker(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseRedis()); }).Register <PCNewsSpider>(); var provider = builder.Build(); var spider = provider.Create <PCNewsSpider>(); return(spider.RunAsync()); }
public static Task Run() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalMessageQueue(); services.AddLocalDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddLocalDownloadCenter(); services.AddSpiderStatisticsCenter(x => x.UseMemory()); }).Register<EntitySpider>(); var provider = builder.Build(); var spider = provider.Create<EntitySpider>(); return spider.RunAsync(); }
/// <summary> /// 运行爬虫 /// </summary> /// <returns></returns> public static Task Run() { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { //这里也使用本地事件注入,目前不需要分布式消息队列Kafka services.AddLocalEventBus(); services.AddLocalDownloadCenter(); services.AddDownloadAgent(x => { x.UseFileLocker(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }).Register <WeiXinSoGouSpider>(); var provider = builder.Build(); var spider = provider.Create <WeiXinSoGouSpider>(); return(spider.RunAsync()); }
public static async Task Run() { try { var builder = new SpiderHostBuilder() .ConfigureLogging(x => x.AddSerilog()) .ConfigureAppConfiguration(x => x.AddJsonFile("appsettings.json")) .ConfigureServices(services => { services.AddLocalEventBus(); services.AddDownloadCenter(x => x.UseLocalDownloaderAgentStore()); services.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter(x => x.UseMemory()); }); var provider = builder.Build(); var spider = provider.Create <Spider>(); spider.NewGuidId(); // 设置任务标识 spider.Name = "博客园全站采集"; // 设置任务名称 spider.Speed = 10; // 设置采集速度, 表示每秒下载多少个请求, 大于 1 时越大速度越快, 小于 1 时越小越慢, 不能为0. spider.Depth = 3; // 设置采集深度 spider.AddDataFlow(new CnblogsDataParser()).AddDataFlow(new ConsoleStorage()); spider.AddRequests(new Request("http://www.cnblogs.com/", new Dictionary <string, string> { { "key1", "value1" } })); // 设置起始链接 await spider.RunAsync(); // 启动 } catch (Exception ex) { Console.WriteLine(ex.ToString()); } }
/// <summary> /// 运行 /// </summary> /// <param name="args">运行参数</param> public static void Execute(params string[] args) { ConfigureSerialLog(); Framework.SetEncoding(); Framework.SetMultiThread(); var configurationBuilder = new ConfigurationBuilder(); configurationBuilder.SetBasePath(AppDomain.CurrentDomain.BaseDirectory); configurationBuilder.AddEnvironmentVariables(); configurationBuilder.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings); var configuration = configurationBuilder.Build(); string spiderTypeName = configuration["type"]; if (string.IsNullOrWhiteSpace(spiderTypeName)) { Log.Logger.Error("未指定需要执行的爬虫类型"); return; } var name = configuration["name"]; var id = configuration["id"] ?? Guid.NewGuid().ToString("N"); var config = configuration["config"]; var arguments = configuration["args"]?.Split(' '); var local = configuration["local"] == "true"; PrintEnvironment(args); var spiderTypes = DetectSpiders(); if (spiderTypes == null || spiderTypes.Count == 0) { return; } var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spiderTypeName.ToLower()); if (spiderType == null) { Log.Logger.Error($"未找到爬虫: {spiderTypeName}", 0, ConsoleColor.DarkYellow); return; } var builder = new SpiderHostBuilder(); builder.ConfigureLogging(b => { #if DEBUG b.SetMinimumLevel(LogLevel.Debug); #else b.SetMinimumLevel(LogLevel.Information); #endif b.AddSerilog(); }); builder.ConfigureAppConfiguration(b => { // 添加 JSON 配置文件 b.AddJsonFile(config); b.AddCommandLine(args); b.AddEnvironmentVariables(); }); if (local) { builder.ConfigureServices(b => { b.AddLocalEventBus(); b.AddLocalDownloadCenter(); b.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); b.AddStatisticsCenter(x => { // 添加内存统计服务 x.UseMemory(); }); }); } else { builder.ConfigureServices(b => { b.AddKafkaEventBus(); }); } builder.Register(spiderType); var provider = builder.Build(); var instance = provider.Create(spiderType); if (instance != null) { instance.Name = name; instance.Id = id; instance.RunAsync(arguments).ConfigureAwait(false).GetAwaiter(); } else { Log.Logger.Error("创建爬虫对象失败", 0, ConsoleColor.DarkYellow); } }
/// <summary> /// 运行 /// </summary> /// <param name="args">运行参数</param> public static void Run(params string[] args) { Framework.SetEncoding(); var configurationBuilder = Framework.CreateConfigurationBuilder(null, args); var configuration = configurationBuilder.Build(); string spider = configuration["spider"]; if (string.IsNullOrWhiteSpace(spider)) { throw new SpiderException("未指定需要执行的爬虫"); } var name = configuration["name"]; var id = configuration["id"] ?? Guid.NewGuid().ToString("N"); var config = configuration["config"]; var arguments = configuration["args"]?.Split(' '); var distribute = configuration["distribute"] == "true"; PrintEnvironment(args); var spiderTypes = DetectSpiders(); if (spiderTypes == null || spiderTypes.Count == 0) { return; } var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spider.ToLower()); if (spiderType == null) { ConsoleHelper.WriteLine($"未找到爬虫: {spider}", 0, ConsoleColor.DarkYellow); return; } var builder = new SpiderHostBuilder(); builder.ConfigureLogging(b => { #if DEBUG b.SetMinimumLevel(LogLevel.Debug); #else b.SetMinimumLevel(LogLevel.Information); #endif b.AddSerilog(); }); builder.ConfigureAppConfiguration(b => { // 添加 JSON 配置文件 b.AddJsonFile(config); b.AddCommandLine(args); }); if (!distribute) { builder.ConfigureServices(b => { b.AddLocalMessageQueue(); b.AddLocalDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); b.AddLocalDownloadCenter(); b.AddSpiderStatisticsCenter(x => { // 添加内存统计服务 x.UseMemory(); }); }); } builder.Register(spiderType); var provider = builder.Build(); var instance = provider.Create(spiderType); if (instance != null) { instance.Name = name; instance.Id = id; instance.RunAsync(arguments); } else { ConsoleHelper.WriteLine("创建爬虫对象失败", 0, ConsoleColor.DarkYellow); } }
static void Main(string[] args) { try { var builder = new SpiderHostBuilder(); var configurationBuilder = Framework.CreateConfigurationBuilder(null, args); var configuration = configurationBuilder.Build(); var @class = configuration["dotnetspider.spider.class"]; var spiderId = configuration["dotnetspider.spider.id"]; @class = "DotnetSpider.Spiders.CnblogsSpider"; spiderId = "xxxxxxxx"; var folder = Directory.Exists("/logs/") ? "/logs/" : ""; var logPath = string.IsNullOrWhiteSpace(spiderId) ? $"{folder}{DateTime.Now:yyyy-MM-dd HH:mm:ss}.log" : $"{folder}{spiderId}.log"; var loggerConfiguration = new LoggerConfiguration() .MinimumLevel.Information() .MinimumLevel.Override("Microsoft", LogEventLevel.Warning) .Enrich.FromLogContext() .WriteTo.Console().WriteTo .RollingFile(logPath); Log.Logger = loggerConfiguration.CreateLogger(); var spiderName = configuration["dotnetspider.spider.name"]; if (string.IsNullOrWhiteSpace(@class) || string.IsNullOrWhiteSpace(spiderId) || string.IsNullOrWhiteSpace(spiderName) ) { Log.Logger.Error($"执行爬虫的参数不正确: class {@class}, id {spiderId}, name {spiderName}"); return; } var type = Type.GetType(@class); if (type == null) { Log.Logger.Error($"未找到爬虫类型: {@class}"); return; } Log.Logger.Information($"获取爬虫类型 {type.FullName} 成功"); builder.ConfigureAppConfiguration(x => { x.AddCommandLine(args); }); builder.ConfigureLogging(x => { x.AddSerilog(); }); builder.ConfigureServices(services => { services.AddKafkaMessageQueue(); }); builder.Register(type); var provider = builder.Build(); var spider = provider.Create(type); Log.Logger.Information($"创建爬虫实例成功"); spider.Id = spiderId; spider.Name = spiderName; Log.Logger.Information($"尝试启动爬虫实例"); spider.Run(); Log.Logger.Information($"爬虫实例退出"); } catch (Exception e) { Log.Logger.Error($"执行失败: {e}"); } }