예제 #1
0
        protected override void LoadModuleConfig()
        {
            var configNode   = GathererSection.Instance.TryGetNode(CONFIG_SECTION);
            var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"];
            var privoder     = configNode.TryGetNode("extendDataPrivoder").Attributes["value"];

            if (!String.IsNullOrEmpty(privoder))
            {
                object objPrivoder = FastActivator.Create(privoder);
                if (objPrivoder is IGathererDataPrivoder)
                {
                    this.extendDataPrivoder = (IGathererDataPrivoder)objPrivoder;
                }
                else
                {
                    throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口");
                }
            }
            //获取是否使用Filter
            Boolean.TryParse(strUseFilter, out this.useFilter);
            if (useFilter)
            {
                var node = configNode.TryGetNode("blackList");
                if (node != null)
                {
                    this.blackList = node.Attributes["value"];
                    this.urlFilter = new BlacklistUrlFilter(this.blackList);
                }
            }
        }
예제 #2
0
        protected override void LoadModuleConfig()
        {
            var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION);
            var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"];
            var privoder = configNode.TryGetNode("extendDataPrivoder").Attributes["value"];

            if (!String.IsNullOrEmpty(privoder))
            {
                object objPrivoder = FastActivator.Create(privoder);
                if (objPrivoder is IGathererDataPrivoder)
                {
                    this.extendDataPrivoder = (IGathererDataPrivoder)objPrivoder;
                }
                else
                {
                    throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口");
                }
            }
            //获取是否使用Filter
            Boolean.TryParse(strUseFilter, out this.useFilter);
            if (useFilter)
            {
                var node = configNode.TryGetNode("whiteList");
                if (node != null)
                {
                    this.whiteList = node.Attributes["value"];
                    this.urlFilter = new WhitelistUrlFilter(this.whiteList);
                }
            }
        }
        public FilteredHttpsProtocolFactory(IUrlFilter urlFilter)
        {
            ArgumentUtility.CheckNotNull ("urlFilter", urlFilter);

              _urlFilter = urlFilter;
              _ctrl = new Control();
              _dispatcher = _ctrl.Dispatcher;
        }
예제 #4
0
        public void Do(JobPortals jobPortal, IUrlFilter filter)
        {
            Log.Information($"Scraping urls for: {jobPortal.GetDescription()}");
            var urls = ExtractPageUrls();

            filter.Apply(ref urls);

            UpdateUrls(urls);
        }
예제 #5
0
        public RedisScheduler()
        {
            ResponseDistributer.StartDistribuiter();
            //RedisSchedulerContext.InitContext();
            Thread t = new Thread(ListenToRedisTopic);

            t.Name = "ResponseDistributeThread";
            t.Start();
            UrlFilter = new RedisUrlFilter();
        }
        public ExtendedHttpProtocol(Control dispatcher, IUrlFilter urlFilter)
        {
            ArgumentUtility.CheckNotNull ("dispatcher", dispatcher);
              ArgumentUtility.CheckNotNull ("urlFilter", urlFilter);

              _urlFilter = urlFilter;
              int workers, ioThreads;
              ThreadPool.GetMaxThreads (out workers, out ioThreads);

              if (workers < c_requiredMaxWorkerThreads)
            ThreadPool.SetMaxThreads (c_requiredMaxWorkerThreads, ioThreads);
        }
예제 #7
0
        public CvOnlineDataService(IHttpClientFactory httpClientFactory, IScraperFactory scraperFactory, IUnitOfWork unitOfWork)
        {
            _parser = new CvOnlineParser();

            _unitOfWork = unitOfWork;
            _analyser   = scraperFactory.BuildAnalyser(JobPortals.CvOnline);
            _scraper    = scraperFactory.BuildScraper(JobPortals.CvOnline);
            _filter     = scraperFactory.BuildUrlFilter(JobPortals.CvOnline);

            _httpClient = httpClientFactory.CreateClient(JobPortals.CvOnline.GetDescription());

            _scrapeClient = new ScrapeClient(_httpClient, _scraper);
        }
        public FilteredHttpsProtocol(Dispatcher dispatcher, IUrlFilter urlFilter)
        {
            ArgumentUtility.CheckNotNull ("dispatcher", dispatcher);
              ArgumentUtility.CheckNotNull ("urlFilter", urlFilter);

              _urlFilter = urlFilter;
              _dispatcher = dispatcher;
              _dispatcher.Invoke (
              () =>
              {
            var originalHandler = new HttpsProtocol();
            _wrapped = (IInternetProtocol) originalHandler;
              });
        }
예제 #9
0
        protected override void LoadConfig()
        {
            var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION);

            //如果不存在配置节点,在模块也就不需要启用
            if (configNode == null)
            {
                return;
            }

            //获取当前Processor是否可用
            if (configNode.Attributes["enabled"] != null &&
                configNode.Attributes["enabled"].Equals("true", StringComparison.CurrentCultureIgnoreCase))
            {
                isEnabled = true;
            }
            //如果模块没有被启用, 后续也不需要加载
            if (!isEnabled)
            {
                return;
            }

            var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"];
            var privoder     = configNode.TryGetNode("extendDataPrivoder").Attributes["value"];

            if (!String.IsNullOrEmpty(privoder))
            {
                object objPrivoder = FastActivator.Create(privoder);
                if (objPrivoder is IGathererDataPrivoder)
                {
                    extendDataPrivoder = (IGathererDataPrivoder)objPrivoder;
                }
                else
                {
                    throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口");
                }
            }
            //获取是否使用Filter
            Boolean.TryParse(strUseFilter, out useFilter);
            if (useFilter)
            {
                var node = configNode.TryGetNode("whiteList");
                if (node != null)
                {
                    whiteList = node.Attributes["value"];
                    urlFilter = new WhitelistUrlFilter(whiteList);
                }
            }
        }
예제 #10
0
        public RedisScheduler(string name, string connectionString)
        {
            if (string.IsNullOrWhiteSpace(name))
            {
                throw new ArgumentNullException(nameof(name));
            }
            if (string.IsNullOrWhiteSpace(connectionString))
            {
                throw new ArgumentNullException(nameof(connectionString));
            }

            _redisSchedulerKey = $"Crawler.Schedulers.RedisScheduler.{name}";
            _database          = ConnectionMultiplexer.Connect(connectionString).GetDatabase();
            _urlFilter         = UrlFilterManager.Current;
        }
예제 #11
0
        public WebBrowserEvents(
        TridentWebBrowser browserControl,
        IUrlFilter nonApplicationUrlFilter,
        IUrlFilter applicationUrlFiler,
        IUrlFilter entryPointFilter)
        {
            ArgumentUtility.CheckNotNull ("browserControl", browserControl);
              ArgumentUtility.CheckNotNull ("nonApplicationUrlFilter", nonApplicationUrlFilter);
              ArgumentUtility.CheckNotNull ("applicationUrlFiler", applicationUrlFiler);

              ArgumentUtility.CheckNotNull ("entryPointFilter", entryPointFilter);

              _browserControl = browserControl;
              _nonApplicationUrlFilter = nonApplicationUrlFilter;
              _applicationUrlFiler = applicationUrlFiler;
              _entryPointFilter = entryPointFilter;

              _isExternal = false;
        }
        public TridentWebBrowserFactory(
            IHtmlDocumentHandleRegistry htmlDocumentHandleRegistry,
            ISubscriptionProvider subscriptionProvider,
            IUrlFilter nonApplicationUrlFilter,
            IUrlFilter entryPointFilter,
            IUrlFilter applicationUrlFilter)
        {
            ArgumentUtility.CheckNotNull ("htmlDocumentHandleRegistry", htmlDocumentHandleRegistry);
              ArgumentUtility.CheckNotNull ("subscriptionProvider", subscriptionProvider);
              ArgumentUtility.CheckNotNull ("nonApplicationUrlFilter", nonApplicationUrlFilter);
              ArgumentUtility.CheckNotNull ("entryPointFilter", entryPointFilter);
              ArgumentUtility.CheckNotNull ("applicationUrlFilter", applicationUrlFilter);

              NonApplicationUrlFilter = nonApplicationUrlFilter;
              EntryPointFilter = entryPointFilter;
              ApplicationUrlFilter = applicationUrlFilter;

              SubscriptionProvider = subscriptionProvider;
              HtmlDocumentHandleRegistry = htmlDocumentHandleRegistry;
        }
예제 #13
0
    public TridentWebBrowser (
        IHtmlDocumentHandleRegistry documentHandleRegistry,
        ISubscriptionProvider subscriptionProvider,
        IUrlFilter nonApplicationUrlFilter,
        IUrlFilter entryPointFilter,
        IUrlFilter applicationUrlFilter)
    {
      ArgumentUtility.CheckNotNull ("documentHandleRegistry", documentHandleRegistry);
      ArgumentUtility.CheckNotNull ("subscriptionProvider", subscriptionProvider);
      ArgumentUtility.CheckNotNull ("nonApplicationUrlFilter", nonApplicationUrlFilter);
      ArgumentUtility.CheckNotNull ("entryPointFilter", entryPointFilter);
      ArgumentUtility.CheckNotNull ("applicationUrlFilter", applicationUrlFilter);

      BrowserEvents = new WebBrowserEvents (this, nonApplicationUrlFilter, applicationUrlFilter, entryPointFilter);
      Navigate (c_blankSite); // bootstrap

      _documentHandleRegistry = documentHandleRegistry;
      _subscriptionProvider = subscriptionProvider;
      _applicationUrlFiler = applicationUrlFilter;

      InstallCustomUIHandler (new DocumentHostUIHandler (this));

      DocumentCompleted += OnDocumentCompleted;
      _documentHandleRegistry.DocumentRegistered += OnDocumentRegistered;
    }
예제 #14
0
        protected override void LoadConfig()
        {
            var configNode = GathererSection.Instance.TryGetNode(CONFIG_SECTION);
            //如果不存在配置节点,在模块也就不需要启用
            if (configNode == null)
                return;
            //获取当前Processor是否可用
            if (configNode.Attributes["enabled"] != null &&
                configNode.Attributes["enabled"].Equals("true", StringComparison.CurrentCultureIgnoreCase))
            {
                isEnabled = true;
            }
            //如果模块没有被启用, 后续也不需要加载
            if (!isEnabled)
                return;

            var strUseFilter = configNode.TryGetNode("useFilter").Attributes["value"];
            var privoder = configNode.TryGetNode("extendDataPrivoder").Attributes["value"];

            if (!String.IsNullOrEmpty(privoder))
            {
                object objPrivoder = FastActivator.Create(privoder);
                if (objPrivoder is IGathererDataPrivoder)
                {
                    extendDataPrivoder = (IGathererDataPrivoder)objPrivoder;
                }
                else
                {
                    throw new ArgumentException("配置中的extendDataPrivoder对象没有实现IGathererDataPrivoder接口");
                }
            }
            //获取是否使用Filter
            Boolean.TryParse(strUseFilter, out useFilter);
            if (useFilter)
            {
                var node = configNode.TryGetNode("blackList");
                if (node != null)
                {
                    blackList = node.Attributes["value"];
                    urlFilter = new BlacklistUrlFilter(blackList);
                }
            }
        }
예제 #15
0
 public SpiderService(IHtmlParser htmlParser, IClient client, IUrlFilter urlFilter)
 {
     this.htmlParser = htmlParser;
     this.client     = client;
     this.urlFilter  = urlFilter;
 }
예제 #16
0
 public InMemoryScheduler()
 {
     RequestReceiver.StartReceiver();
     ResponseDistributer.StartDistribuiter();
     urlFilter = new InMemoryUrlFilter();
 }
예제 #17
0
 public static void SetUrlFilter(Func <IUrlFilter> func)
 {
     _urlFilter = func();
 }
예제 #18
0
 public static void SetUrlFilter(IUrlFilter urlFilter)
 {
     _urlFilter = urlFilter;
 }
        public void LoadFrom(Uri manifestLocation)
        {
            ArgumentUtility.CheckNotNull ("manifestLocation", manifestLocation);

              var configuration = DesktopGapConfigurationProvider.Create (String.Empty, manifestLocation.ToString()).GetConfiguration();

              Application = new ApplicationInfo
                    {
                        Name = configuration.Application.Name,
                        BaseUri = configuration.Application.GetBaseUri(),
                        IconUri = configuration.Application.GetIconUri(),
                        ManifestUri = manifestLocation,
                        AllowCloseHomeTab = configuration.Application.AllowCloseHomeTab,
                        AlwaysShowUrl = configuration.Application.AlwaysShowUrl,
                        AlwaysOpenHomeUrl = configuration.Application.AlwaysOpenHomeUrl,
                        HomeUri = configuration.Application.GetHomeUri()
                    };

              var thirdPartyUrlRules = configuration.Security.NonApplicationUrlRules;
              var applicationUrlRules = configuration.Security.ApplicationUrlRules;
              var startUpUrlRules = configuration.Security.StartupUrlRules;

              var resourceUrls = thirdPartyUrlRules.Union (applicationUrlRules);

              var addInRules = configuration.Security.AddInRules;

              ResourceFilter = new UrlFilter (resourceUrls);
              NonApplicationUrlFilter = new UrlFilter (thirdPartyUrlRules);
              AddInAllowedFilter = new UrlFilter (applicationUrlRules);
              StartUpFilter = new UrlFilter (startUpUrlRules);
              AddInFilter = new AddInFilter (addInRules);

              HomeTabColorCode = configuration.Application.TabColors.Home;
              NonApplicationTabColorCode = configuration.Application.TabColors.NonApplication;
              ApplicationTabColorCode = configuration.Application.TabColors.Application;

              EnableResourceFilter = configuration.Security.EnableResourceFilter;
        }