public override void Handle(ref Page page, ISpider spider) { if (DateTime.Now > _nextTime) { _cookieInjector?.Inject(spider); _nextTime = DateTime.Now.AddSeconds(_dueTime); } }
public override void Handle(ref Page page, ISpider spider) { if (!string.IsNullOrEmpty(page?.Content) && page.Content.Contains(_content)) { _cookieInjector?.Inject(spider); } throw new SpiderException($"Content downloaded contains string: {_content}."); }
/// <summary> /// Update cookies regularly. /// </summary> /// <summary xml:lang="zh-CN"> /// 定时更新Cookie /// </summary> /// <param name="page">页面数据 <see cref="Page"/></param> /// <param name="downloader">下载器 <see cref="IDownloader"/></param> /// <param name="spider">爬虫 <see cref="ISpider"/></param> public override void Handle(ref Page page, IDownloader downloader, ISpider spider) { if (DateTime.Now > _next) { _next = DateTime.Now.AddSeconds(_interval); _cookieInjector.Inject(downloader, spider); } }
/// <summary> /// Update cookies regularly. /// </summary> /// <summary xml:lang="zh-CN"> /// 定时更新Cookie /// </summary> /// <param name="response">页面数据 <see cref="Response"/></param> /// <param name="downloader">下载器 <see cref="IDownloader"/></param> public override void Handle(ref Response response, IDownloader downloader) { if (DateTime.Now > _next) { _next = DateTime.Now.AddSeconds(_interval); _cookieInjector.Inject(downloader); } }
private CookieContainer GenerateNewCookieContainer(ISpider spider, IDownloader downloader, CookieContainer cookieContainer, ICookieInjector cookieInjector = null) { var key = cookieInjector == null ? $"{cookieContainer.GetHashCode()}" : $"{cookieContainer.GetHashCode()}_{cookieInjector?.GetHashCode()}"; if (!_initedCookieContainers.ContainsKey(key)) { cookieInjector?.Inject(downloader, spider); // 此处完成COPY一个新的Container的原因是, 某此网站会在COOKIE中设置值, 上下访问有承向启下的关系, 所以必须独立的CookieContainer来管理 var newCookieContainer = CopyCookieContainer(cookieContainer); _initedCookieContainers.Add(key, newCookieContainer); } return(_initedCookieContainers[key]); }
public override void Handle(ref Page page, ISpider spider) { if (!string.IsNullOrEmpty(page?.Content) && !string.IsNullOrEmpty(_content) && _cookieInjector != null && page.Content.Contains(_content)) { if (NetworkCenter.Current.Executor.Redial() == RedialResult.Failed) { spider.Exit(); } Spider.AddToCycleRetry(page.Request, spider.Site); _cookieInjector?.Inject(spider); page.Exception = new DownloadException($"Content downloaded contains string: {_content}."); } }
/// <summary> /// 当页面数据包含指定内容时触发ADSL拨号, 并且重新获取Cookie /// </summary> /// <param name="page">页面数据</param> /// <param name="spider">爬虫</param> public override void Handle(ref Page page, ISpider spider) { if (!string.IsNullOrEmpty(page?.Content)) { var content = page.Content; var containContent = _contents.FirstOrDefault(c => content.Contains(c)); if (containContent != null) { if (NetworkCenter.Current.Executor.Redial() == RedialResult.Failed) { spider.Exit(); } Spider.AddToCycleRetry(page.Request, spider.Site); _cookieInjector.Inject(spider); page.Exception = new DownloadException($"Downloaded content contains: {containContent}."); } } }
/// <summary> /// Redial ADSL and re-obtain cookie when <see cref="Page.Content"/> contains specified contents. /// </summary> /// <summary xml:lang="zh-CN"> /// 当页面数据包含指定内容时触发ADSL拨号, 并且重新获取Cookie /// </summary> /// <param name="page">页面数据</param> /// <param name="downloader">下载器</param> /// <param name="spider">爬虫</param> public override void Handle(ref Page page, IDownloader downloader, ISpider spider) { if (page == null || string.IsNullOrWhiteSpace(page.Content)) { return; } var content = page.Content; var containContent = _contents.FirstOrDefault(c => content.Contains(c)); if (containContent != null) { if (NetworkCenter.Current.Executor.Redial() == RedialResult.Failed) { spider.Exit(); } page = spider.Site.AddToCycleRetry(page.Request); _cookieInjector.Inject(downloader, spider); page.Exception = new DownloadException($"Downloaded content contains: {containContent}."); } }