protected override void LoadModuleConfig() { var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION); var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"]; var privoder = configNode.TryGetNode("extendDataPrivoder").Attributes["value"]; if (!String.IsNullOrEmpty(privoder)) { object objPrivoder = FastActivator.Create(privoder); if (objPrivoder is IGathererDataPrivoder) { this.extendDataPrivoder = (IGathererDataPrivoder)objPrivoder; } else { throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口"); } } //获取是否使用Filter Boolean.TryParse(strUseFilter, out this.useFilter); if (useFilter) { var node = configNode.TryGetNode("blackList"); if (node != null) { this.blackList = node.Attributes["value"]; this.urlFilter = new BlacklistUrlFilter(this.blackList); } } }
protected override void LoadModuleConfig() { var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION); var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"]; var privoder = configNode.TryGetNode("extendDataPrivoder").Attributes["value"]; if (!String.IsNullOrEmpty(privoder)) { object objPrivoder = FastActivator.Create(privoder); if (objPrivoder is IGathererDataPrivoder) { this.extendDataPrivoder = (IGathererDataPrivoder)objPrivoder; } else { throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口"); } } //获取是否使用Filter Boolean.TryParse(strUseFilter, out this.useFilter); if (useFilter) { var node = configNode.TryGetNode("whiteList"); if (node != null) { this.whiteList = node.Attributes["value"]; this.urlFilter = new WhitelistUrlFilter(this.whiteList); } } }
public FilteredHttpsProtocolFactory(IUrlFilter urlFilter) { ArgumentUtility.CheckNotNull ("urlFilter", urlFilter); _urlFilter = urlFilter; _ctrl = new Control(); _dispatcher = _ctrl.Dispatcher; }
public void Do(JobPortals jobPortal, IUrlFilter filter) { Log.Information($"Scraping urls for: {jobPortal.GetDescription()}"); var urls = ExtractPageUrls(); filter.Apply(ref urls); UpdateUrls(urls); }
public RedisScheduler() { ResponseDistributer.StartDistribuiter(); //RedisSchedulerContext.InitContext(); Thread t = new Thread(ListenToRedisTopic); t.Name = "ResponseDistributeThread"; t.Start(); UrlFilter = new RedisUrlFilter(); }
public ExtendedHttpProtocol(Control dispatcher, IUrlFilter urlFilter) { ArgumentUtility.CheckNotNull ("dispatcher", dispatcher); ArgumentUtility.CheckNotNull ("urlFilter", urlFilter); _urlFilter = urlFilter; int workers, ioThreads; ThreadPool.GetMaxThreads (out workers, out ioThreads); if (workers < c_requiredMaxWorkerThreads) ThreadPool.SetMaxThreads (c_requiredMaxWorkerThreads, ioThreads); }
public CvOnlineDataService(IHttpClientFactory httpClientFactory, IScraperFactory scraperFactory, IUnitOfWork unitOfWork) { _parser = new CvOnlineParser(); _unitOfWork = unitOfWork; _analyser = scraperFactory.BuildAnalyser(JobPortals.CvOnline); _scraper = scraperFactory.BuildScraper(JobPortals.CvOnline); _filter = scraperFactory.BuildUrlFilter(JobPortals.CvOnline); _httpClient = httpClientFactory.CreateClient(JobPortals.CvOnline.GetDescription()); _scrapeClient = new ScrapeClient(_httpClient, _scraper); }
public FilteredHttpsProtocol(Dispatcher dispatcher, IUrlFilter urlFilter) { ArgumentUtility.CheckNotNull ("dispatcher", dispatcher); ArgumentUtility.CheckNotNull ("urlFilter", urlFilter); _urlFilter = urlFilter; _dispatcher = dispatcher; _dispatcher.Invoke ( () => { var originalHandler = new HttpsProtocol(); _wrapped = (IInternetProtocol) originalHandler; }); }
protected override void LoadConfig() { var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION); //如果不存在配置节点,在模块也就不需要启用 if (configNode == null) { return; } //获取当前Processor是否可用 if (configNode.Attributes["enabled"] != null && configNode.Attributes["enabled"].Equals("true", StringComparison.CurrentCultureIgnoreCase)) { isEnabled = true; } //如果模块没有被启用, 后续也不需要加载 if (!isEnabled) { return; } var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"]; var privoder = configNode.TryGetNode("extendDataPrivoder").Attributes["value"]; if (!String.IsNullOrEmpty(privoder)) { object objPrivoder = FastActivator.Create(privoder); if (objPrivoder is IGathererDataPrivoder) { extendDataPrivoder = (IGathererDataPrivoder)objPrivoder; } else { throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口"); } } //获取是否使用Filter Boolean.TryParse(strUseFilter, out useFilter); if (useFilter) { var node = configNode.TryGetNode("whiteList"); if (node != null) { whiteList = node.Attributes["value"]; urlFilter = new WhitelistUrlFilter(whiteList); } } }
public RedisScheduler(string name, string connectionString) { if (string.IsNullOrWhiteSpace(name)) { throw new ArgumentNullException(nameof(name)); } if (string.IsNullOrWhiteSpace(connectionString)) { throw new ArgumentNullException(nameof(connectionString)); } _redisSchedulerKey = $"Crawler.Schedulers.RedisScheduler.{name}"; _database = ConnectionMultiplexer.Connect(connectionString).GetDatabase(); _urlFilter = UrlFilterManager.Current; }
public WebBrowserEvents( TridentWebBrowser browserControl, IUrlFilter nonApplicationUrlFilter, IUrlFilter applicationUrlFiler, IUrlFilter entryPointFilter) { ArgumentUtility.CheckNotNull ("browserControl", browserControl); ArgumentUtility.CheckNotNull ("nonApplicationUrlFilter", nonApplicationUrlFilter); ArgumentUtility.CheckNotNull ("applicationUrlFiler", applicationUrlFiler); ArgumentUtility.CheckNotNull ("entryPointFilter", entryPointFilter); _browserControl = browserControl; _nonApplicationUrlFilter = nonApplicationUrlFilter; _applicationUrlFiler = applicationUrlFiler; _entryPointFilter = entryPointFilter; _isExternal = false; }
public TridentWebBrowserFactory( IHtmlDocumentHandleRegistry htmlDocumentHandleRegistry, ISubscriptionProvider subscriptionProvider, IUrlFilter nonApplicationUrlFilter, IUrlFilter entryPointFilter, IUrlFilter applicationUrlFilter) { ArgumentUtility.CheckNotNull ("htmlDocumentHandleRegistry", htmlDocumentHandleRegistry); ArgumentUtility.CheckNotNull ("subscriptionProvider", subscriptionProvider); ArgumentUtility.CheckNotNull ("nonApplicationUrlFilter", nonApplicationUrlFilter); ArgumentUtility.CheckNotNull ("entryPointFilter", entryPointFilter); ArgumentUtility.CheckNotNull ("applicationUrlFilter", applicationUrlFilter); NonApplicationUrlFilter = nonApplicationUrlFilter; EntryPointFilter = entryPointFilter; ApplicationUrlFilter = applicationUrlFilter; SubscriptionProvider = subscriptionProvider; HtmlDocumentHandleRegistry = htmlDocumentHandleRegistry; }
public TridentWebBrowser ( IHtmlDocumentHandleRegistry documentHandleRegistry, ISubscriptionProvider subscriptionProvider, IUrlFilter nonApplicationUrlFilter, IUrlFilter entryPointFilter, IUrlFilter applicationUrlFilter) { ArgumentUtility.CheckNotNull ("documentHandleRegistry", documentHandleRegistry); ArgumentUtility.CheckNotNull ("subscriptionProvider", subscriptionProvider); ArgumentUtility.CheckNotNull ("nonApplicationUrlFilter", nonApplicationUrlFilter); ArgumentUtility.CheckNotNull ("entryPointFilter", entryPointFilter); ArgumentUtility.CheckNotNull ("applicationUrlFilter", applicationUrlFilter); BrowserEvents = new WebBrowserEvents (this, nonApplicationUrlFilter, applicationUrlFilter, entryPointFilter); Navigate (c_blankSite); // bootstrap _documentHandleRegistry = documentHandleRegistry; _subscriptionProvider = subscriptionProvider; _applicationUrlFiler = applicationUrlFilter; InstallCustomUIHandler (new DocumentHostUIHandler (this)); DocumentCompleted += OnDocumentCompleted; _documentHandleRegistry.DocumentRegistered += OnDocumentRegistered; }
protected override void LoadConfig() { var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION); //如果不存在配置节点,在模块也就不需要启用 if (configNode == null) return; //获取当前Processor是否可用 if (configNode.Attributes["enabled"] != null && configNode.Attributes["enabled"].Equals("true", StringComparison.CurrentCultureIgnoreCase)) { isEnabled = true; } //如果模块没有被启用, 后续也不需要加载 if (!isEnabled) return; var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"]; var privoder = configNode.TryGetNode("extendDataPrivoder").Attributes["value"]; if (!String.IsNullOrEmpty(privoder)) { object objPrivoder = FastActivator.Create(privoder); if (objPrivoder is IGathererDataPrivoder) { extendDataPrivoder = (IGathererDataPrivoder)objPrivoder; } else { throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口"); } } //获取是否使用Filter Boolean.TryParse(strUseFilter, out useFilter); if (useFilter) { var node = configNode.TryGetNode("blackList"); if (node != null) { blackList = node.Attributes["value"]; urlFilter = new BlacklistUrlFilter(blackList); } } }
public SpiderService(IHtmlParser htmlParser, IClient client, IUrlFilter urlFilter) { this.htmlParser = htmlParser; this.client = client; this.urlFilter = urlFilter; }
public InMemoryScheduler() { RequestReceiver.StartReceiver(); ResponseDistributer.StartDistribuiter(); urlFilter = new InMemoryUrlFilter(); }
public static void SetUrlFilter(Func <IUrlFilter> func) { _urlFilter = func(); }
public static void SetUrlFilter(IUrlFilter urlFilter) { _urlFilter = urlFilter; }
public void LoadFrom(Uri manifestLocation) { ArgumentUtility.CheckNotNull ("manifestLocation", manifestLocation); var configuration = DesktopGapConfigurationProvider.Create (String.Empty, manifestLocation.ToString()).GetConfiguration(); Application = new ApplicationInfo { Name = configuration.Application.Name, BaseUri = configuration.Application.GetBaseUri(), IconUri = configuration.Application.GetIconUri(), ManifestUri = manifestLocation, AllowCloseHomeTab = configuration.Application.AllowCloseHomeTab, AlwaysShowUrl = configuration.Application.AlwaysShowUrl, AlwaysOpenHomeUrl = configuration.Application.AlwaysOpenHomeUrl, HomeUri = configuration.Application.GetHomeUri() }; var thirdPartyUrlRules = configuration.Security.NonApplicationUrlRules; var applicationUrlRules = configuration.Security.ApplicationUrlRules; var startUpUrlRules = configuration.Security.StartupUrlRules; var resourceUrls = thirdPartyUrlRules.Union (applicationUrlRules); var addInRules = configuration.Security.AddInRules; ResourceFilter = new UrlFilter (resourceUrls); NonApplicationUrlFilter = new UrlFilter (thirdPartyUrlRules); AddInAllowedFilter = new UrlFilter (applicationUrlRules); StartUpFilter = new UrlFilter (startUpUrlRules); AddInFilter = new AddInFilter (addInRules); HomeTabColorCode = configuration.Application.TabColors.Home; NonApplicationTabColorCode = configuration.Application.TabColors.NonApplication; ApplicationTabColorCode = configuration.Application.TabColors.Application; EnableResourceFilter = configuration.Security.EnableResourceFilter; }