public static void StartWithHost(string[] args) { var configure = new LoggerConfiguration() #if DEBUG .MinimumLevel.Verbose() #else .MinimumLevel.Information() #endif .MinimumLevel.Override("Microsoft", LogEventLevel.Warning) .Enrich.FromLogContext() .WriteTo.Console().WriteTo .RollingFile("dotnet-spider.log"); Log.Logger = configure.CreateLogger(); var hostBuilder = new SpiderHostBuilder() .ConfigureAppConfiguration(x => { if (File.Exists("appsettings.json")) { x.AddJsonFile("appsettings.json"); } x.AddCommandLine(args); //x.AddEnvironmentVariables(); }) .ConfigureLogging(x => { x.AddSerilog(); }) .ConfigureServices((services) => { services.AddLocalEventBus(); services.AddSingleton <IScheduler>(new MyScheduler()); services.AddLocalDownloadCenter(); services.AddDownloaderAgent((x) => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); services.AddStatisticsCenter((x) => { x.UseMemory(); }); }); hostBuilder.Register <IndexSpider>(); hostBuilder.Register <InfoSpider>(); var host = hostBuilder.Build(); host.Start(); var spider1 = host.Create <IndexSpider>(); Task task = spider1.RunAsync(); task.ContinueWith((t) => { var spider2 = host.Create <InfoSpider>(); spider2.RunAsync(args); }); }
/// <summary> /// 运行 /// </summary> /// <param name="args">运行参数</param> public static void Execute(params string[] args) { ConfigureSerialLog(); Framework.SetEncoding(); Framework.SetMultiThread(); var configurationBuilder = new ConfigurationBuilder(); configurationBuilder.SetBasePath(AppDomain.CurrentDomain.BaseDirectory); configurationBuilder.AddEnvironmentVariables(); configurationBuilder.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings); var configuration = configurationBuilder.Build(); string spiderTypeName = configuration["type"]; if (string.IsNullOrWhiteSpace(spiderTypeName)) { Log.Logger.Error("未指定需要执行的爬虫类型"); return; } var name = configuration["name"]; var id = configuration["id"] ?? Guid.NewGuid().ToString("N"); var config = configuration["config"]; var arguments = configuration["args"]?.Split(' '); var local = configuration["local"] == "true"; PrintEnvironment(args); var spiderTypes = DetectSpiders(); if (spiderTypes == null || spiderTypes.Count == 0) { return; } var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spiderTypeName.ToLower()); if (spiderType == null) { Log.Logger.Error($"未找到爬虫: {spiderTypeName}", 0, ConsoleColor.DarkYellow); return; } var builder = new SpiderHostBuilder(); builder.ConfigureLogging(b => { #if DEBUG b.SetMinimumLevel(LogLevel.Debug); #else b.SetMinimumLevel(LogLevel.Information); #endif b.AddSerilog(); }); builder.ConfigureAppConfiguration(b => { // 添加 JSON 配置文件 b.AddJsonFile(config); b.AddCommandLine(args); b.AddEnvironmentVariables(); }); if (local) { builder.ConfigureServices(b => { b.AddLocalEventBus(); b.AddLocalDownloadCenter(); b.AddDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); b.AddStatisticsCenter(x => { // 添加内存统计服务 x.UseMemory(); }); }); } else { builder.ConfigureServices(b => { b.AddKafkaEventBus(); }); } builder.Register(spiderType); var provider = builder.Build(); var instance = provider.Create(spiderType); if (instance != null) { instance.Name = name; instance.Id = id; instance.RunAsync(arguments).ConfigureAwait(false).GetAwaiter(); } else { Log.Logger.Error("创建爬虫对象失败", 0, ConsoleColor.DarkYellow); } }
/// <summary> /// 运行 /// </summary> /// <param name="args">运行参数</param> public static void Run(params string[] args) { Framework.SetEncoding(); var configurationBuilder = Framework.CreateConfigurationBuilder(null, args); var configuration = configurationBuilder.Build(); string spider = configuration["spider"]; if (string.IsNullOrWhiteSpace(spider)) { throw new SpiderException("未指定需要执行的爬虫"); } var name = configuration["name"]; var id = configuration["id"] ?? Guid.NewGuid().ToString("N"); var config = configuration["config"]; var arguments = configuration["args"]?.Split(' '); var distribute = configuration["distribute"] == "true"; PrintEnvironment(args); var spiderTypes = DetectSpiders(); if (spiderTypes == null || spiderTypes.Count == 0) { return; } var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spider.ToLower()); if (spiderType == null) { ConsoleHelper.WriteLine($"未找到爬虫: {spider}", 0, ConsoleColor.DarkYellow); return; } var builder = new SpiderHostBuilder(); builder.ConfigureLogging(b => { #if DEBUG b.SetMinimumLevel(LogLevel.Debug); #else b.SetMinimumLevel(LogLevel.Information); #endif b.AddSerilog(); }); builder.ConfigureAppConfiguration(b => { // 添加 JSON 配置文件 b.AddJsonFile(config); b.AddCommandLine(args); }); if (!distribute) { builder.ConfigureServices(b => { b.AddLocalMessageQueue(); b.AddLocalDownloaderAgent(x => { x.UseFileLocker(); x.UseDefaultAdslRedialer(); x.UseDefaultInternetDetector(); }); b.AddLocalDownloadCenter(); b.AddSpiderStatisticsCenter(x => { // 添加内存统计服务 x.UseMemory(); }); }); } builder.Register(spiderType); var provider = builder.Build(); var instance = provider.Create(spiderType); if (instance != null) { instance.Name = name; instance.Id = id; instance.RunAsync(arguments); } else { ConsoleHelper.WriteLine("创建爬虫对象失败", 0, ConsoleColor.DarkYellow); } }
static void Main(string[] args) { try { var builder = new SpiderHostBuilder(); var configurationBuilder = Framework.CreateConfigurationBuilder(null, args); var configuration = configurationBuilder.Build(); var @class = configuration["dotnetspider.spider.class"]; var spiderId = configuration["dotnetspider.spider.id"]; @class = "DotnetSpider.Spiders.CnblogsSpider"; spiderId = "xxxxxxxx"; var folder = Directory.Exists("/logs/") ? "/logs/" : ""; var logPath = string.IsNullOrWhiteSpace(spiderId) ? $"{folder}{DateTime.Now:yyyy-MM-dd HH:mm:ss}.log" : $"{folder}{spiderId}.log"; var loggerConfiguration = new LoggerConfiguration() .MinimumLevel.Information() .MinimumLevel.Override("Microsoft", LogEventLevel.Warning) .Enrich.FromLogContext() .WriteTo.Console().WriteTo .RollingFile(logPath); Log.Logger = loggerConfiguration.CreateLogger(); var spiderName = configuration["dotnetspider.spider.name"]; if (string.IsNullOrWhiteSpace(@class) || string.IsNullOrWhiteSpace(spiderId) || string.IsNullOrWhiteSpace(spiderName) ) { Log.Logger.Error($"执行爬虫的参数不正确: class {@class}, id {spiderId}, name {spiderName}"); return; } var type = Type.GetType(@class); if (type == null) { Log.Logger.Error($"未找到爬虫类型: {@class}"); return; } Log.Logger.Information($"获取爬虫类型 {type.FullName} 成功"); builder.ConfigureAppConfiguration(x => { x.AddCommandLine(args); }); builder.ConfigureLogging(x => { x.AddSerilog(); }); builder.ConfigureServices(services => { services.AddKafkaMessageQueue(); }); builder.Register(type); var provider = builder.Build(); var spider = provider.Create(type); Log.Logger.Information($"创建爬虫实例成功"); spider.Id = spiderId; spider.Name = spiderName; Log.Logger.Information($"尝试启动爬虫实例"); spider.Run(); Log.Logger.Information($"爬虫实例退出"); } catch (Exception e) { Log.Logger.Error($"执行失败: {e}"); } }