예제 #1
0
        public static void StartWithHost(string[] args)
        {
            var configure = new LoggerConfiguration()
#if DEBUG
                            .MinimumLevel.Verbose()
#else
                            .MinimumLevel.Information()
#endif
                            .MinimumLevel.Override("Microsoft", LogEventLevel.Warning)
                            .Enrich.FromLogContext()
                            .WriteTo.Console().WriteTo
                            .RollingFile("dotnet-spider.log");

            Log.Logger = configure.CreateLogger();

            var hostBuilder = new SpiderHostBuilder()
                              .ConfigureAppConfiguration(x =>
            {
                if (File.Exists("appsettings.json"))
                {
                    x.AddJsonFile("appsettings.json");
                }

                x.AddCommandLine(args);
                //x.AddEnvironmentVariables();
            })
                              .ConfigureLogging(x => { x.AddSerilog(); })
                              .ConfigureServices((services) =>
            {
                services.AddLocalEventBus();
                services.AddSingleton <IScheduler>(new MyScheduler());
                services.AddLocalDownloadCenter();
                services.AddDownloaderAgent((x) =>
                {
                    x.UseFileLocker();
                    x.UseDefaultAdslRedialer();
                    x.UseDefaultInternetDetector();
                });
                services.AddStatisticsCenter((x) =>
                {
                    x.UseMemory();
                });
            });

            hostBuilder.Register <IndexSpider>();
            hostBuilder.Register <InfoSpider>();
            var host = hostBuilder.Build();

            host.Start();

            var  spider1 = host.Create <IndexSpider>();
            Task task    = spider1.RunAsync();

            task.ContinueWith((t) =>
            {
                var spider2 = host.Create <InfoSpider>();
                spider2.RunAsync(args);
            });
        }
예제 #2
0
        /// <summary>
        /// 运行
        /// </summary>
        /// <param name="args">运行参数</param>
        public static void Execute(params string[] args)
        {
            ConfigureSerialLog();

            Framework.SetEncoding();

            Framework.SetMultiThread();

            var configurationBuilder = new ConfigurationBuilder();

            configurationBuilder.SetBasePath(AppDomain.CurrentDomain.BaseDirectory);
            configurationBuilder.AddEnvironmentVariables();
            configurationBuilder.AddCommandLine(Environment.GetCommandLineArgs(), Framework.SwitchMappings);
            var configuration = configurationBuilder.Build();

            string spiderTypeName = configuration["type"];

            if (string.IsNullOrWhiteSpace(spiderTypeName))
            {
                Log.Logger.Error("未指定需要执行的爬虫类型");
                return;
            }

            var name      = configuration["name"];
            var id        = configuration["id"] ?? Guid.NewGuid().ToString("N");
            var config    = configuration["config"];
            var arguments = configuration["args"]?.Split(' ');
            var local     = configuration["local"] == "true";

            PrintEnvironment(args);

            var spiderTypes = DetectSpiders();

            if (spiderTypes == null || spiderTypes.Count == 0)
            {
                return;
            }

            var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spiderTypeName.ToLower());

            if (spiderType == null)
            {
                Log.Logger.Error($"未找到爬虫: {spiderTypeName}", 0, ConsoleColor.DarkYellow);
                return;
            }

            var builder = new SpiderHostBuilder();

            builder.ConfigureLogging(b =>
            {
#if DEBUG
                b.SetMinimumLevel(LogLevel.Debug);
#else
                b.SetMinimumLevel(LogLevel.Information);
#endif
                b.AddSerilog();
            });
            builder.ConfigureAppConfiguration(b =>
            {
                // 添加 JSON 配置文件
                b.AddJsonFile(config);
                b.AddCommandLine(args);
                b.AddEnvironmentVariables();
            });

            if (local)
            {
                builder.ConfigureServices(b =>
                {
                    b.AddLocalEventBus();
                    b.AddLocalDownloadCenter();
                    b.AddDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    b.AddStatisticsCenter(x =>
                    {
                        // 添加内存统计服务
                        x.UseMemory();
                    });
                });
            }
            else
            {
                builder.ConfigureServices(b => { b.AddKafkaEventBus(); });
            }

            builder.Register(spiderType);
            var provider = builder.Build();
            var instance = provider.Create(spiderType);
            if (instance != null)
            {
                instance.Name = name;
                instance.Id   = id;
                instance.RunAsync(arguments).ConfigureAwait(false).GetAwaiter();
            }
            else
            {
                Log.Logger.Error("创建爬虫对象失败", 0, ConsoleColor.DarkYellow);
            }
        }
예제 #3
0
        /// <summary>
        /// 运行
        /// </summary>
        /// <param name="args">运行参数</param>
        public static void Run(params string[] args)
        {
            Framework.SetEncoding();

            var    configurationBuilder = Framework.CreateConfigurationBuilder(null, args);
            var    configuration        = configurationBuilder.Build();
            string spider = configuration["spider"];

            if (string.IsNullOrWhiteSpace(spider))
            {
                throw new SpiderException("未指定需要执行的爬虫");
            }

            var name       = configuration["name"];
            var id         = configuration["id"] ?? Guid.NewGuid().ToString("N");
            var config     = configuration["config"];
            var arguments  = configuration["args"]?.Split(' ');
            var distribute = configuration["distribute"] == "true";

            PrintEnvironment(args);

            var spiderTypes = DetectSpiders();

            if (spiderTypes == null || spiderTypes.Count == 0)
            {
                return;
            }

            var spiderType = spiderTypes.FirstOrDefault(x => x.Name.ToLower() == spider.ToLower());

            if (spiderType == null)
            {
                ConsoleHelper.WriteLine($"未找到爬虫: {spider}", 0, ConsoleColor.DarkYellow);
                return;
            }

            var builder = new SpiderHostBuilder();

            builder.ConfigureLogging(b =>
            {
#if DEBUG
                b.SetMinimumLevel(LogLevel.Debug);
#else
                b.SetMinimumLevel(LogLevel.Information);
#endif
                b.AddSerilog();
            });
            builder.ConfigureAppConfiguration(b =>
            {
                // 添加 JSON 配置文件
                b.AddJsonFile(config);
                b.AddCommandLine(args);
            });

            if (!distribute)
            {
                builder.ConfigureServices(b =>
                {
                    b.AddLocalMessageQueue();
                    b.AddLocalDownloaderAgent(x =>
                    {
                        x.UseFileLocker();
                        x.UseDefaultAdslRedialer();
                        x.UseDefaultInternetDetector();
                    });
                    b.AddLocalDownloadCenter();
                    b.AddSpiderStatisticsCenter(x =>
                    {
                        // 添加内存统计服务
                        x.UseMemory();
                    });
                });
            }

            builder.Register(spiderType);
            var provider = builder.Build();
            var instance = provider.Create(spiderType);
            if (instance != null)
            {
                instance.Name = name;
                instance.Id   = id;
                instance.RunAsync(arguments);
            }
            else
            {
                ConsoleHelper.WriteLine("创建爬虫对象失败", 0, ConsoleColor.DarkYellow);
            }
        }
예제 #4
0
        static void Main(string[] args)
        {
            try
            {
                var builder = new SpiderHostBuilder();

                var configurationBuilder = Framework.CreateConfigurationBuilder(null, args);
                var configuration        = configurationBuilder.Build();
                var @class   = configuration["dotnetspider.spider.class"];
                var spiderId = configuration["dotnetspider.spider.id"];

                @class   = "DotnetSpider.Spiders.CnblogsSpider";
                spiderId = "xxxxxxxx";


                var folder = Directory.Exists("/logs/") ? "/logs/" : "";

                var logPath = string.IsNullOrWhiteSpace(spiderId)
                                        ? $"{folder}{DateTime.Now:yyyy-MM-dd HH:mm:ss}.log"
                                        : $"{folder}{spiderId}.log";

                var loggerConfiguration = new LoggerConfiguration()
                                          .MinimumLevel.Information()
                                          .MinimumLevel.Override("Microsoft", LogEventLevel.Warning)
                                          .Enrich.FromLogContext()
                                          .WriteTo.Console().WriteTo
                                          .RollingFile(logPath);
                Log.Logger = loggerConfiguration.CreateLogger();

                var spiderName = configuration["dotnetspider.spider.name"];
                if (string.IsNullOrWhiteSpace(@class) ||
                    string.IsNullOrWhiteSpace(spiderId) ||
                    string.IsNullOrWhiteSpace(spiderName)
                    )
                {
                    Log.Logger.Error($"执行爬虫的参数不正确: class {@class}, id {spiderId}, name {spiderName}");
                    return;
                }

                var type = Type.GetType(@class);
                if (type == null)
                {
                    Log.Logger.Error($"未找到爬虫类型: {@class}");
                    return;
                }

                Log.Logger.Information($"获取爬虫类型 {type.FullName} 成功");
                builder.ConfigureAppConfiguration(x =>
                {
                    x.AddCommandLine(args);
                });
                builder.ConfigureLogging(x =>
                {
                    x.AddSerilog();
                });
                builder.ConfigureServices(services =>
                {
                    services.AddKafkaMessageQueue();
                });
                builder.Register(type);
                var provider = builder.Build();

                var spider = provider.Create(type);
                Log.Logger.Information($"创建爬虫实例成功");
                spider.Id   = spiderId;
                spider.Name = spiderName;

                Log.Logger.Information($"尝试启动爬虫实例");
                spider.Run();

                Log.Logger.Information($"爬虫实例退出");
            }
            catch (Exception e)
            {
                Log.Logger.Error($"执行失败: {e}");
            }
        }