Пример #1
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="options">下载器代理选项</param>
 /// <param name="spiderOptions"></param>
 /// <param name="eventBus">消息队列</param>
 /// <param name="networkCenter">网络中心</param>
 /// <param name="logger">日志接口</param>
 public LocalDownloaderAgent(DownloaderAgentOptions options, SpiderOptions spiderOptions,
                             IEventBus eventBus, NetworkCenter networkCenter,
                             ILogger <LocalDownloaderAgent> logger) : base(options, spiderOptions,
                                                                           eventBus, networkCenter, logger)
 {
     // ConfigureDownloader = downloader => downloader.Logger = null;
 }
Пример #2
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="options">下载器代理选项</param>
 /// <param name="spiderOptions"></param>
 /// <param name="eventBus">消息队列</param>
 /// <param name="logger">日志接口</param>
 public LocalDownloadedAgent(DownloadAgentOptions options, SpiderOptions spiderOptions,
                             IEventBus eventBus,
                             ILogger <LocalDownloadedAgent> logger) : base(options, spiderOptions,
                                                                           eventBus, logger)
 {
     // ConfigureDownload = download => download.Logger = null;
 }
Пример #3
0
        /// <summary>
        /// This method is called by main to check a link. After
        /// spidering through the site, the final list of bad links
        /// is displayed.
        /// </summary>
        /// <param name="url">The URL to check for bad links.</param>
        public void check(Uri url)
        {
            SpiderOptions options = new SpiderOptions();

            options.WorkloadManager = typeof(MemoryWorkloadManager).FullName;
            LinkReport report = new LinkReport();
            Spider     spider = new Spider(options, report);

            spider.AddURL(url, null, 1);

            spider.Process();
            Console.WriteLine(spider.Status);

            if (report.Bad.Count > 0)
            {
                Console.WriteLine("Bad Links Found:");
                foreach (String str in report.Bad)
                {
                    Console.WriteLine(str);
                }
            }
            else
            {
                Console.WriteLine("No bad links were found.");
            }
        }
Пример #4
0
        internal static StorageBase GetDefaultStorage(SpiderOptions options)
        {
            var type = Type.GetType(options.Storage);

            if (type == null)
            {
                throw new SpiderException("存储器类型配置不正确,或者未添加对应的库");
            }

            if (!typeof(StorageBase).IsAssignableFrom(type))
            {
                throw new SpiderException("存储器类型配置不正确");
            }

            var method = type.GetMethod("CreateFromOptions");

            if (method == null)
            {
                throw new SpiderException("存储器未实现 CreateFromOptions 方法,无法自动创建");
            }

            var storage = method.Invoke(null, new object[] { options });

            if (storage == null)
            {
                throw new SpiderException("创建默认存储器失败");
            }

            return((StorageBase)storage);
        }
Пример #5
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="eventBus">消息队列接口</param>
 /// <param name="options"></param>
 /// <param name="statisticsStore">统计存储接口</param>
 /// <param name="logger">日志接口</param>
 public StatisticsCenter(IEventBus eventBus, SpiderOptions options, IStatisticsStore statisticsStore,
                         ILogger <StatisticsCenter> logger)
 {
     _options         = options;
     _eventBus        = eventBus;
     _statisticsStore = statisticsStore;
     _logger          = logger;
 }
Пример #6
0
        public Spider(List <Uri> seedUris, SpiderOptions options, ProjectCredentials creds)
        {
            _seedUris = seedUris;
            _options  = options;

            SetHttpClientCredentials(creds);
            ConfigureUriCrawlValidators();
        }
 public ProxyBackgroundService(
     IProxyService pool, ILogger <ProxyService> logger, IServiceProvider serviceProvider,
     IOptions <SpiderOptions> options)
 {
     _proxySupplier = serviceProvider.GetService(typeof(IProxySupplier)) as IProxySupplier;
     _pool          = pool;
     _logger        = logger;
     _options       = options.Value;
 }
Пример #8
0
 public AgentController(PortalDbContext dbContext,
                        IMessageQueue eventBus,
                        IOptions <SpiderOptions> options, IMapper mapper)
 {
     _dbContext = dbContext;
     _mq        = eventBus;
     _mapper    = mapper;
     _options   = options.Value;
 }
 /// <summary>
 /// 根据配置返回存储器
 /// </summary>
 /// <param name="options">配置</param>
 /// <returns></returns>
 public new static PostgreSqlEntityStorage CreateFromOptions(SpiderOptions options)
 {
     return(new PostgreSqlEntityStorage(options.StorageType, options.StorageConnectionString)
     {
         IgnoreCase = options.StorageIgnoreCase,
         RetryTimes = options.StorageRetryTimes,
         UseTransaction = options.StorageUseTransaction
     });
 }
Пример #10
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="request"></param>
 /// <param name="response">下载器返回的结果</param>
 /// <param name="options"></param>
 /// <param name="serviceProvider"></param>
 public DataFlowContext(IServiceProvider serviceProvider,
                        SpiderOptions options,
                        Request request,
                        Response response
                        )
 {
     Request         = request;
     Response        = response;
     Options         = options;
     ServiceProvider = serviceProvider;
 }
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="eventBus">消息队列</param>
 /// <param name="downloaderAgentStore">下载器代理存储</param>
 /// <param name="options">系统选项</param>
 /// <param name="logger">日志接口</param>
 protected DownloadAgentRegisterCenterBase(
     IMq eventBus,
     IDownloaderAgentStore downloaderAgentStore,
     SpiderOptions options,
     ILogger logger)
 {
     Mq = eventBus;
     DownloaderAgentStore = downloaderAgentStore;
     Logger  = logger;
     Options = options;
 }
Пример #12
0
        /// <summary>
        /// 根据配置返回存储器
        /// </summary>
        /// <param name="options">配置</param>
        /// <returns></returns>
        public static MySqlEntityStorage CreateFromOptions(SpiderOptions options)
        {
            var storage = new MySqlEntityStorage(options.StorageType, options.StorageConnectionString)
            {
                IgnoreCase     = options.StorageIgnoreCase,
                RetryTimes     = options.StorageRetryTimes,
                UseTransaction = options.StorageUseTransaction
            };

            return(storage);
        }
Пример #13
0
        /// <summary>
        /// 构造方法
        /// </summary>
        /// <param name="options">下载器代理选项</param>
        /// <param name="spiderOptions"></param>
        /// <param name="eventBus">消息队列</param>

        /// <param name="logger">日志接口</param>
        protected DownloadAgentBase(
            DownloadAgentOptions options,
            SpiderOptions spiderOptions,
            IEventBus eventBus,
            ILogger logger)
        {
            _spiderOptions = spiderOptions;
            _eventBus      = eventBus;
            _options       = options;
            Logger         = logger;
        }
Пример #14
0
        /// <summary>
        /// 根据配置返回存储器
        /// </summary>
        /// <param name="options">配置</param>
        /// <returns></returns>
        public static MySqlFileEntityStorage CreateFromOptions(SpiderOptions options)
        {
            var fileType = string.IsNullOrWhiteSpace(options.MySqlFileType)
                                ? MySqlFileType.InsertSql
                                : (MySqlFileType)Enum.Parse(typeof(MySqlFileType),
                                                            options.MySqlFileType);

            return(new MySqlFileEntityStorage(fileType)
            {
                IgnoreCase = options.StorageIgnoreCase
            });
        }
Пример #15
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="request"></param>
 /// <param name="response">下载器返回的结果</param>
 /// <param name="options"></param>
 /// <param name="serviceProvider"></param>
 public DataFlowContext(IServiceProvider serviceProvider,
                        SpiderOptions options,
                        Request request,
                        Response response
                        )
 {
     Request         = request;
     Response        = response;
     Options         = options;
     ServiceProvider = serviceProvider;
     FollowRequests  = new List <Request>();
 }
        /// <summary>
        /// Download an entire site.
        /// </summary>
        /// <param name="config">The spider configuration file to use.</param>
        /// <param name="baseHost">The URL to start from.</param>
        /// <param name="local">The local path to save files to.</param>
        public void Download(String config, Uri baseHost, String local)
        {
            WorldSpiderReport report  = new WorldSpiderReport(local);
            SpiderOptions     options = new SpiderOptions();

            options.Load(config);
            Spider spider = new Spider(options, report);

            spider.AddURL(baseHost, null, 1);
            spider.Process();
            Console.WriteLine(spider.Status);
        }
Пример #17
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="options">下载器代理选项</param>
 /// <param name="spiderOptions"></param>
 /// <param name="eventBus">消息队列</param>
 /// <param name="networkCenter">网络中心</param>
 /// <param name="logger">日志接口</param>
 protected DownloaderAgentBase(
     DownloaderAgentOptions options,
     SpiderOptions spiderOptions,
     IEventBus eventBus,
     NetworkCenter networkCenter,
     ILogger logger)
 {
     _spiderOptions          = spiderOptions;
     _eventBus               = eventBus;
     _options                = options;
     Framework.NetworkCenter = networkCenter;
     Logger = logger;
 }
Пример #18
0
        /// <summary>
        /// 构造方法
        /// </summary>
        /// <param name="options">下载器代理选项</param>
        /// <param name="spiderOptions"></param>
        /// <param name="eventBus">消息队列</param>
        /// <param name="networkCenter">网络中心</param>
        /// <param name="logger">日志接口</param>
        protected DownloaderAgentBase(
            DownloaderAgentOptions options,
            SpiderOptions spiderOptions,
            IMq eventBus,
            NetworkCenter networkCenter,
            ILogger logger)
        {
            _spiderOptions          = spiderOptions;
            _mq                     = eventBus;
            _options                = options;
            Framework.NetworkCenter = networkCenter;

            Logger = _mq is ThroughMessageQueue ? null : logger;
        }
Пример #19
0
        public DefaultProxyValidator(IOptions <SpiderOptions> options, IHttpClientFactory httpClientFactory,
                                     ILogger <DefaultProxyValidator> logger)
        {
            _httpClientFactory = httpClientFactory;
            _logger            = logger;
            _options           = options.Value;

            _options.ProxyTestUrl.NotNullOrWhiteSpace(nameof(_options.ProxyTestUrl));

            if (!Uri.TryCreate(_options.ProxyTestUrl, UriKind.RelativeOrAbsolute, out _))
            {
                throw new ArgumentException($"{nameof(_options.ProxyTestUrl)} is not a valid uri");
            }
        }
Пример #20
0
 /// <summary>
 /// 爬虫基类
 /// </summary>
 /// <param name="eventBus"></param>
 /// <param name="options"></param>
 /// <param name="logger"></param>
 /// <param name="services">服务提供接口</param>
 /// <param name="statisticsService"></param>
 public Spider(
     IEventBus eventBus,
     IStatisticsService statisticsService,
     SpiderOptions options,
     ILogger <Spider> logger,
     IServiceProvider services)
 {
     _services               = services;
     _statisticsService      = statisticsService;
     _eventBus               = eventBus;
     _options                = options;
     _logger                 = logger;
     Console.CancelKeyPress += ConsoleCancelKeyPress;
 }
        public static ISpiderBuilder AddSpider(this IServiceCollection services, Action <SpiderOptions> action = null)
        {
            SpiderOptions options = new SpiderOptions();

            action?.Invoke(options);

            services.AddSingleton(options);
            services.AddHttpClient( );
            services.AddSingleton <ISpiderHttpClientFactory, SpiderHttpClientFactory>();
            services.AddSingleton <IMonitorHealthJob, MonitorHealthJob>();
            services.AddHttpContextAccessor();

            return(new SpiderBuilder(options));
        }
Пример #22
0
        /// <summary>
        /// 构造方法
        /// </summary>
        /// <param name="options">爬虫选项</param>
        /// <param name="logger">日志接口</param>
        public KafkaEventBus(SpiderOptions options,
                             ILogger <KafkaEventBus> logger)
        {
            _logger  = logger;
            _options = options;
            var productConfig = new ProducerConfig
            {
                BootstrapServers = options.KafkaBootstrapServers,
                Partitioner      = Partitioner.ConsistentRandom
            };
            var builder =
                new ProducerBuilder <Null, Event>(productConfig).SetValueSerializer(new ProtobufSerializer <Event>());

            _producer = builder.Build();
        }
Пример #23
0
        /// <summary>
        /// Download an entire site.
        /// </summary>
        /// <param name="config">The spider configuration file to use.</param>
        /// <param name="baseURL">The URL to start from.></param>
        /// <param name="local">The local path to save files to.</param>
        public void Download(String config, Uri baseURL, String local)
        {
            SpiderReport  report  = new SpiderReport(local);
            SpiderOptions options = new SpiderOptions();

            options.Load(config);
            Spider spider = new Spider(options, report);

            spider.Logging.Console  = true;
            spider.Logging.Filename = "c:\\spider.log";
            spider.Logging.Clear();

            spider.AddURL(baseURL, null, 1);
            spider.Process();
            Console.WriteLine(spider.Status);
        }
Пример #24
0
 protected AbstractDownloaderAgent(string agentId,
                                   string name,
                                   IMessageQueue mq, SpiderOptions options, IDownloaderAllocator downloaderAllocator,
                                   ILoggerFactory loggerFactory)
 {
     Check.NotNull(agentId, nameof(agentId));
     Check.NotNull(name, nameof(name));
     _agentId             = agentId;
     _name                = name;
     _mq                  = mq;
     _downloaderAllocator = downloaderAllocator;
     Logger               = loggerFactory.CreateLogger(GetType().FullName);
     if (!string.IsNullOrEmpty(options.ProxySupplyUrl))
     {
         _httpProxyPool = new HttpProxyPool(new HttpRowTextProxySupplier(options.ProxySupplyUrl));
     }
 }
Пример #25
0
        public WeatherForecastController(ILogger <WeatherForecastController> logger, Func <string, ITransPortService> func,
                                         IStringLocalizer <WeatherForecastController> stringLocalizer, IHttpContextAccessor httpContextAccessor,
                                         ISpiderHttpClientFactory spiderHttpClientFactory, SpiderOptions spiderOptions, IUserApi userApi)
        {
            _logger              = logger;
            _userApi             = userApi;
            _httpContextAccessor = httpContextAccessor;

            _spiderHttpClientFactory = spiderHttpClientFactory;
            _spiderOptions           = spiderOptions;

            var mongodb  = func.Invoke(MongodbConstant.MONGODBNAME);
            var dad      = CultureInfo.CurrentUICulture;
            var c        = stringLocalizer["test"];
            var activity = System.Diagnostics.Activity.Current;

            activity?.AddTag("1", "1");
            activity?.AddBaggage("1", "1");
        }
        static void Main(String[] args)
        {
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);

            MainForm form = new MainForm();

            if (args.Length < 1)
            {
                MessageBox.Show("Please pass a path to a spider configuration file as an argument to this program (i.e. Recipe13_4 c:\\spider.conf).", "Heaton Research Spider");
                return;
            }

            SpiderOptions options = new SpiderOptions();

            options.Load(args[0]);
            form.Options = options;

            Application.Run(form);
        }
Пример #27
0
        /// <summary>
        /// 构造方法
        /// </summary>
        /// <param name="options">爬虫选项</param>
        /// <param name="logger">日志接口</param>
        public KafkaEventBus(SpiderOptions options,
                             ILogger <KafkaEventBus> logger)
        {
            _logger = logger;
            _config = new ConsumerConfig
            {
                GroupId          = options.KafkaConsumerGroup,
                BootstrapServers = options.KafkaBootstrapServers,
                // Note: The AutoOffsetReset property determines the start offset in the event
                // there are not yet any committed offsets for the consumer group for the
                // topic/partitions of interest. By default, offsets are committed
                // automatically, so in this example, consumption will only start from the
                // earliest message in the topic 'my-topic' the first time you run the program.
                AutoOffsetReset = AutoOffsetReset.Earliest
            };
            var productConfig = new ProducerConfig {
                BootstrapServers = options.KafkaBootstrapServers
            };

            _producer = new ProducerBuilder <Null, string>(productConfig).Build();
        }
Пример #28
0
 /// <summary>
 /// 构造方法
 /// </summary>
 /// <param name="eventBus">消息队列</param>
 /// <param name="downloaderAgentStore">下载器代理存储</param>
 /// <param name="options">系统选项</param>
 /// <param name="logger">日志接口</param>
 public DefaultDownloadAgentRegisterCenter(IEventBus eventBus, IDownloaderAgentStore downloaderAgentStore, SpiderOptions options,
                                           ILogger <DefaultDownloadAgentRegisterCenter> logger) : base(eventBus, downloaderAgentStore, options, logger)
 {
 }
Пример #29
0
        /// <summary>
        /// 根据配置返回存储器
        /// </summary>
        /// <param name="options">配置</param>
        /// <returns></returns>
        public static HBaseStorage CreateFromOptions(SpiderOptions options)
        {
            var storage = new HBaseStorage(options.HBaseRestServer);

            return(storage);
        }
Пример #30
0
		public MySqlDownloaderAgentStore(SpiderOptions options)
		{
			_options = options;
		}