/// <summary> /// 构造方法 /// </summary> /// <param name="options">下载器代理选项</param> /// <param name="spiderOptions"></param> /// <param name="eventBus">消息队列</param> /// <param name="networkCenter">网络中心</param> /// <param name="logger">日志接口</param> public LocalDownloaderAgent(DownloaderAgentOptions options, SpiderOptions spiderOptions, IEventBus eventBus, NetworkCenter networkCenter, ILogger <LocalDownloaderAgent> logger) : base(options, spiderOptions, eventBus, networkCenter, logger) { // ConfigureDownloader = downloader => downloader.Logger = null; }
/// <summary> /// 构造方法 /// </summary> /// <param name="options">下载器代理选项</param> /// <param name="spiderOptions"></param> /// <param name="eventBus">消息队列</param> /// <param name="logger">日志接口</param> public LocalDownloadedAgent(DownloadAgentOptions options, SpiderOptions spiderOptions, IEventBus eventBus, ILogger <LocalDownloadedAgent> logger) : base(options, spiderOptions, eventBus, logger) { // ConfigureDownload = download => download.Logger = null; }
/// <summary> /// This method is called by main to check a link. After /// spidering through the site, the final list of bad links /// is displayed. /// </summary> /// <param name="url">The URL to check for bad links.</param> public void check(Uri url) { SpiderOptions options = new SpiderOptions(); options.WorkloadManager = typeof(MemoryWorkloadManager).FullName; LinkReport report = new LinkReport(); Spider spider = new Spider(options, report); spider.AddURL(url, null, 1); spider.Process(); Console.WriteLine(spider.Status); if (report.Bad.Count > 0) { Console.WriteLine("Bad Links Found:"); foreach (String str in report.Bad) { Console.WriteLine(str); } } else { Console.WriteLine("No bad links were found."); } }
internal static StorageBase GetDefaultStorage(SpiderOptions options) { var type = Type.GetType(options.Storage); if (type == null) { throw new SpiderException("存储器类型配置不正确,或者未添加对应的库"); } if (!typeof(StorageBase).IsAssignableFrom(type)) { throw new SpiderException("存储器类型配置不正确"); } var method = type.GetMethod("CreateFromOptions"); if (method == null) { throw new SpiderException("存储器未实现 CreateFromOptions 方法,无法自动创建"); } var storage = method.Invoke(null, new object[] { options }); if (storage == null) { throw new SpiderException("创建默认存储器失败"); } return((StorageBase)storage); }
/// <summary> /// 构造方法 /// </summary> /// <param name="eventBus">消息队列接口</param> /// <param name="options"></param> /// <param name="statisticsStore">统计存储接口</param> /// <param name="logger">日志接口</param> public StatisticsCenter(IEventBus eventBus, SpiderOptions options, IStatisticsStore statisticsStore, ILogger <StatisticsCenter> logger) { _options = options; _eventBus = eventBus; _statisticsStore = statisticsStore; _logger = logger; }
public Spider(List <Uri> seedUris, SpiderOptions options, ProjectCredentials creds) { _seedUris = seedUris; _options = options; SetHttpClientCredentials(creds); ConfigureUriCrawlValidators(); }
public ProxyBackgroundService( IProxyService pool, ILogger <ProxyService> logger, IServiceProvider serviceProvider, IOptions <SpiderOptions> options) { _proxySupplier = serviceProvider.GetService(typeof(IProxySupplier)) as IProxySupplier; _pool = pool; _logger = logger; _options = options.Value; }
public AgentController(PortalDbContext dbContext, IMessageQueue eventBus, IOptions <SpiderOptions> options, IMapper mapper) { _dbContext = dbContext; _mq = eventBus; _mapper = mapper; _options = options.Value; }
/// <summary> /// 根据配置返回存储器 /// </summary> /// <param name="options">配置</param> /// <returns></returns> public new static PostgreSqlEntityStorage CreateFromOptions(SpiderOptions options) { return(new PostgreSqlEntityStorage(options.StorageType, options.StorageConnectionString) { IgnoreCase = options.StorageIgnoreCase, RetryTimes = options.StorageRetryTimes, UseTransaction = options.StorageUseTransaction }); }
/// <summary> /// 构造方法 /// </summary> /// <param name="request"></param> /// <param name="response">下载器返回的结果</param> /// <param name="options"></param> /// <param name="serviceProvider"></param> public DataFlowContext(IServiceProvider serviceProvider, SpiderOptions options, Request request, Response response ) { Request = request; Response = response; Options = options; ServiceProvider = serviceProvider; }
/// <summary> /// 构造方法 /// </summary> /// <param name="eventBus">消息队列</param> /// <param name="downloaderAgentStore">下载器代理存储</param> /// <param name="options">系统选项</param> /// <param name="logger">日志接口</param> protected DownloadAgentRegisterCenterBase( IMq eventBus, IDownloaderAgentStore downloaderAgentStore, SpiderOptions options, ILogger logger) { Mq = eventBus; DownloaderAgentStore = downloaderAgentStore; Logger = logger; Options = options; }
/// <summary> /// 根据配置返回存储器 /// </summary> /// <param name="options">配置</param> /// <returns></returns> public static MySqlEntityStorage CreateFromOptions(SpiderOptions options) { var storage = new MySqlEntityStorage(options.StorageType, options.StorageConnectionString) { IgnoreCase = options.StorageIgnoreCase, RetryTimes = options.StorageRetryTimes, UseTransaction = options.StorageUseTransaction }; return(storage); }
/// <summary> /// 构造方法 /// </summary> /// <param name="options">下载器代理选项</param> /// <param name="spiderOptions"></param> /// <param name="eventBus">消息队列</param> /// <param name="logger">日志接口</param> protected DownloadAgentBase( DownloadAgentOptions options, SpiderOptions spiderOptions, IEventBus eventBus, ILogger logger) { _spiderOptions = spiderOptions; _eventBus = eventBus; _options = options; Logger = logger; }
/// <summary> /// 根据配置返回存储器 /// </summary> /// <param name="options">配置</param> /// <returns></returns> public static MySqlFileEntityStorage CreateFromOptions(SpiderOptions options) { var fileType = string.IsNullOrWhiteSpace(options.MySqlFileType) ? MySqlFileType.InsertSql : (MySqlFileType)Enum.Parse(typeof(MySqlFileType), options.MySqlFileType); return(new MySqlFileEntityStorage(fileType) { IgnoreCase = options.StorageIgnoreCase }); }
/// <summary> /// 构造方法 /// </summary> /// <param name="request"></param> /// <param name="response">下载器返回的结果</param> /// <param name="options"></param> /// <param name="serviceProvider"></param> public DataFlowContext(IServiceProvider serviceProvider, SpiderOptions options, Request request, Response response ) { Request = request; Response = response; Options = options; ServiceProvider = serviceProvider; FollowRequests = new List <Request>(); }
/// <summary> /// Download an entire site. /// </summary> /// <param name="config">The spider configuration file to use.</param> /// <param name="baseHost">The URL to start from.</param> /// <param name="local">The local path to save files to.</param> public void Download(String config, Uri baseHost, String local) { WorldSpiderReport report = new WorldSpiderReport(local); SpiderOptions options = new SpiderOptions(); options.Load(config); Spider spider = new Spider(options, report); spider.AddURL(baseHost, null, 1); spider.Process(); Console.WriteLine(spider.Status); }
/// <summary> /// 构造方法 /// </summary> /// <param name="options">下载器代理选项</param> /// <param name="spiderOptions"></param> /// <param name="eventBus">消息队列</param> /// <param name="networkCenter">网络中心</param> /// <param name="logger">日志接口</param> protected DownloaderAgentBase( DownloaderAgentOptions options, SpiderOptions spiderOptions, IEventBus eventBus, NetworkCenter networkCenter, ILogger logger) { _spiderOptions = spiderOptions; _eventBus = eventBus; _options = options; Framework.NetworkCenter = networkCenter; Logger = logger; }
/// <summary> /// 构造方法 /// </summary> /// <param name="options">下载器代理选项</param> /// <param name="spiderOptions"></param> /// <param name="eventBus">消息队列</param> /// <param name="networkCenter">网络中心</param> /// <param name="logger">日志接口</param> protected DownloaderAgentBase( DownloaderAgentOptions options, SpiderOptions spiderOptions, IMq eventBus, NetworkCenter networkCenter, ILogger logger) { _spiderOptions = spiderOptions; _mq = eventBus; _options = options; Framework.NetworkCenter = networkCenter; Logger = _mq is ThroughMessageQueue ? null : logger; }
public DefaultProxyValidator(IOptions <SpiderOptions> options, IHttpClientFactory httpClientFactory, ILogger <DefaultProxyValidator> logger) { _httpClientFactory = httpClientFactory; _logger = logger; _options = options.Value; _options.ProxyTestUrl.NotNullOrWhiteSpace(nameof(_options.ProxyTestUrl)); if (!Uri.TryCreate(_options.ProxyTestUrl, UriKind.RelativeOrAbsolute, out _)) { throw new ArgumentException($"{nameof(_options.ProxyTestUrl)} is not a valid uri"); } }
/// <summary> /// 爬虫基类 /// </summary> /// <param name="eventBus"></param> /// <param name="options"></param> /// <param name="logger"></param> /// <param name="services">服务提供接口</param> /// <param name="statisticsService"></param> public Spider( IEventBus eventBus, IStatisticsService statisticsService, SpiderOptions options, ILogger <Spider> logger, IServiceProvider services) { _services = services; _statisticsService = statisticsService; _eventBus = eventBus; _options = options; _logger = logger; Console.CancelKeyPress += ConsoleCancelKeyPress; }
public static ISpiderBuilder AddSpider(this IServiceCollection services, Action <SpiderOptions> action = null) { SpiderOptions options = new SpiderOptions(); action?.Invoke(options); services.AddSingleton(options); services.AddHttpClient( ); services.AddSingleton <ISpiderHttpClientFactory, SpiderHttpClientFactory>(); services.AddSingleton <IMonitorHealthJob, MonitorHealthJob>(); services.AddHttpContextAccessor(); return(new SpiderBuilder(options)); }
/// <summary> /// 构造方法 /// </summary> /// <param name="options">爬虫选项</param> /// <param name="logger">日志接口</param> public KafkaEventBus(SpiderOptions options, ILogger <KafkaEventBus> logger) { _logger = logger; _options = options; var productConfig = new ProducerConfig { BootstrapServers = options.KafkaBootstrapServers, Partitioner = Partitioner.ConsistentRandom }; var builder = new ProducerBuilder <Null, Event>(productConfig).SetValueSerializer(new ProtobufSerializer <Event>()); _producer = builder.Build(); }
/// <summary> /// Download an entire site. /// </summary> /// <param name="config">The spider configuration file to use.</param> /// <param name="baseURL">The URL to start from.></param> /// <param name="local">The local path to save files to.</param> public void Download(String config, Uri baseURL, String local) { SpiderReport report = new SpiderReport(local); SpiderOptions options = new SpiderOptions(); options.Load(config); Spider spider = new Spider(options, report); spider.Logging.Console = true; spider.Logging.Filename = "c:\\spider.log"; spider.Logging.Clear(); spider.AddURL(baseURL, null, 1); spider.Process(); Console.WriteLine(spider.Status); }
protected AbstractDownloaderAgent(string agentId, string name, IMessageQueue mq, SpiderOptions options, IDownloaderAllocator downloaderAllocator, ILoggerFactory loggerFactory) { Check.NotNull(agentId, nameof(agentId)); Check.NotNull(name, nameof(name)); _agentId = agentId; _name = name; _mq = mq; _downloaderAllocator = downloaderAllocator; Logger = loggerFactory.CreateLogger(GetType().FullName); if (!string.IsNullOrEmpty(options.ProxySupplyUrl)) { _httpProxyPool = new HttpProxyPool(new HttpRowTextProxySupplier(options.ProxySupplyUrl)); } }
public WeatherForecastController(ILogger <WeatherForecastController> logger, Func <string, ITransPortService> func, IStringLocalizer <WeatherForecastController> stringLocalizer, IHttpContextAccessor httpContextAccessor, ISpiderHttpClientFactory spiderHttpClientFactory, SpiderOptions spiderOptions, IUserApi userApi) { _logger = logger; _userApi = userApi; _httpContextAccessor = httpContextAccessor; _spiderHttpClientFactory = spiderHttpClientFactory; _spiderOptions = spiderOptions; var mongodb = func.Invoke(MongodbConstant.MONGODBNAME); var dad = CultureInfo.CurrentUICulture; var c = stringLocalizer["test"]; var activity = System.Diagnostics.Activity.Current; activity?.AddTag("1", "1"); activity?.AddBaggage("1", "1"); }
static void Main(String[] args) { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); MainForm form = new MainForm(); if (args.Length < 1) { MessageBox.Show("Please pass a path to a spider configuration file as an argument to this program (i.e. Recipe13_4 c:\\spider.conf).", "Heaton Research Spider"); return; } SpiderOptions options = new SpiderOptions(); options.Load(args[0]); form.Options = options; Application.Run(form); }
/// <summary> /// 构造方法 /// </summary> /// <param name="options">爬虫选项</param> /// <param name="logger">日志接口</param> public KafkaEventBus(SpiderOptions options, ILogger <KafkaEventBus> logger) { _logger = logger; _config = new ConsumerConfig { GroupId = options.KafkaConsumerGroup, BootstrapServers = options.KafkaBootstrapServers, // Note: The AutoOffsetReset property determines the start offset in the event // there are not yet any committed offsets for the consumer group for the // topic/partitions of interest. By default, offsets are committed // automatically, so in this example, consumption will only start from the // earliest message in the topic 'my-topic' the first time you run the program. AutoOffsetReset = AutoOffsetReset.Earliest }; var productConfig = new ProducerConfig { BootstrapServers = options.KafkaBootstrapServers }; _producer = new ProducerBuilder <Null, string>(productConfig).Build(); }
/// <summary> /// 构造方法 /// </summary> /// <param name="eventBus">消息队列</param> /// <param name="downloaderAgentStore">下载器代理存储</param> /// <param name="options">系统选项</param> /// <param name="logger">日志接口</param> public DefaultDownloadAgentRegisterCenter(IEventBus eventBus, IDownloaderAgentStore downloaderAgentStore, SpiderOptions options, ILogger <DefaultDownloadAgentRegisterCenter> logger) : base(eventBus, downloaderAgentStore, options, logger) { }
/// <summary> /// 根据配置返回存储器 /// </summary> /// <param name="options">配置</param> /// <returns></returns> public static HBaseStorage CreateFromOptions(SpiderOptions options) { var storage = new HBaseStorage(options.HBaseRestServer); return(storage); }
public MySqlDownloaderAgentStore(SpiderOptions options) { _options = options; }