/// <summary> /// 运行 /// </summary> /// <param name="args">运行参数</param> public static void Run(params string[] args) { Framework.SetEncoding(); var configurationBuilder = Framework.CreateConfigurationBuilder(null, args); var configuration = configurationBuilder.Build(); string spider = configuration["spider"]; if (string.IsNullOrWhiteSpace(spider)) { throw new SpiderException("未指定需要执行的爬虫"); } var name = configuration["name"]; var id = configuration["id"] ?? Guid.NewGuid().ToString("N"); var config = configuration["config"]; var arguments = configuration["args"]?.Split(' '); var distribute = configuration["Distribute"] == "true"; PrintEnvironment(args); var spiderTypes = DetectSpiders(); if (spiderTypes == null || spiderTypes.Count == 0) { return; } var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spider.ToLower()); if (spiderType == null) { ConsoleHelper.WriteLine($"未找到爬虫: {spider}", 0, ConsoleColor.DarkYellow); return; } var services = new ServiceCollection(); services.AddDotnetSpider(builder => { builder.UseConfiguration(config, args); builder.UseSerilog(); if (!distribute) { builder.UseStandalone(); } builder.UseDefaultDownloaderAllocator(); builder.RegisterSpider(spiderType); }); var factory = services.BuildServiceProvider().GetRequiredService <ISpiderFactory>(); var instance = factory.Create(spiderType); if (instance != null) { instance.Name = name; instance.Id = id; instance.RunAsync(arguments); } else { ConsoleHelper.WriteLine("创建爬虫对象失败", 0, ConsoleColor.DarkYellow); } }
/// <summary> /// 检测爬虫类型 /// </summary> /// <returns></returns> public static Dictionary <string, Type> DetectSpiders() { var spiderTypes = new Dictionary <string, Type>(); var namedType = typeof(INamed); var identityType = typeof(IIdentity); var irunableType = typeof(IRunable); foreach (var file in DetectDlls()) { var asm = Assembly.Load(file); var types = asm.GetTypes(); Console.WriteLine($"Fetch assembly : {asm.GetName(false)}."); foreach (var type in types) { bool hasNonParametersConstructor = type.GetConstructors().Any(c => c.IsPublic && c.GetParameters().Length == 0); var fullName = type.FullName; if (string.IsNullOrWhiteSpace(fullName)) { continue; } if (hasNonParametersConstructor) { var interfaces = type.GetInterfaces(); var isNamed = interfaces.Any(t => namedType.IsAssignableFrom(t)); var isIdentity = interfaces.Any(t => identityType.IsAssignableFrom(t)); var isRunnable = interfaces.Any(t => irunableType.IsAssignableFrom(t)); if (isNamed && isRunnable && isIdentity) { if (!spiderTypes.ContainsKey(fullName)) { spiderTypes.Add(fullName, type); } else { ConsoleHelper.WriteLine($"Spider {type.Name} are duplicate.", 1); return(null); } var startupName = type.GetCustomAttributes(typeof(TaskName), true).FirstOrDefault() as TaskName; if (startupName != null) { if (!spiderTypes.ContainsKey(startupName.Name)) { spiderTypes.Add(startupName.Name, type); } else { ConsoleHelper.WriteLine($"Spider {type.Name} are duplicate.", 1); return(null); } } } } } } if (spiderTypes.Count == 0) { ConsoleHelper.WriteLine("Did not detect any spider.", 1, ConsoleColor.DarkYellow); return(null); } Console.WriteLine($"Count of crawlers: {spiderTypes.Keys.Count}"); return(spiderTypes); }
/// <summary> /// 分析运行参数 /// </summary> /// <param name="args">运行参数</param> /// <returns>运行参数字典</returns> public static Dictionary <string, string> AnalyzeArguments(params string[] args) { Dictionary <string, string> arguments = new Dictionary <string, string>(); foreach (var arg in args) { if (string.IsNullOrWhiteSpace(arg)) { ConsoleHelper.WriteLine("Command: -s:[spider type name] -i:[identity] -a:[arg1,arg2...] -tid:[taskId] -n:[name] -e:[en1=value1,en2=value2,...]"); return(null); } var results = arg.Replace(" ", "").Split(':'); if (results.Length == 2) { var key = results[0].Trim(); if (Regex.IsMatch(key, @"-\w+")) { if (arguments.ContainsKey(key)) { arguments[key] = results[1].Trim(); } else { arguments.Add(key, results[1].Trim()); } } else { ConsoleHelper.WriteLine("Command: -s:[spider type name] -i:[identity] -a:[arg1,arg2...] -tid:[taskId] -n:[name]"); return(null); } } else if (results.Length == 1) { var key = results[0].Trim(); if (Regex.IsMatch(key, @"-\w+")) { if (!arguments.ContainsKey(key)) { arguments.Add(key, string.Empty); } } else { ConsoleHelper.WriteLine("Command: -s:[spider type name] -i:[identity] -a:[arg1,arg2...] -tid:[taskId] -n:[name]"); return(null); } } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Command: -s:[spider type name] -i:[identity] -a:[arg1,arg2...] -tid:[taskId] -n:[name]"); Console.ForegroundColor = ConsoleColor.White; return(null); } } if (arguments.Count == 0 || !arguments.ContainsKey("-s") || !arguments.ContainsKey("-tid")) { ConsoleHelper.WriteLine("Error: -s & -tid are necessary."); return(null); } return(arguments); }