コード例 #1
0
        public DomainConsumer(IWebCrawlerFactory crawlerFactory, IProcessorProvider processorProvider, ProcessorContext processorContext, CrawlDaddyConfig config)
        {
            if (crawlerFactory == null)
            {
                throw new ArgumentNullException("crawlerFactory");
            }

            if (processorProvider == null)
            {
                throw new ArgumentNullException("processorProvider");
            }

            if (processorContext == null)
            {
                throw new ArgumentNullException("processorContext");
            }

            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            _crawlerFactory    = crawlerFactory;
            _processorProvider = processorProvider;
            _processorContext  = processorContext;
            _config            = config;
        }
コード例 #2
0
        private IStatsGLogger GetDefaultStatsGLoggerInstance(CrawlDaddyConfig config)
        {
            string hostUrl      = config.StatsGHostConfigName;
            int    port         = config.StatsGPortConfigName;
            var    statsGLogger = new StatsGLogger(hostUrl, port);

            return(statsGLogger);
        }
コード例 #3
0
        public StatsGLoggerAppender(CrawlDaddyConfig config, IStatsGLogger statsGLogger)
        {
            _config = config ?? new CrawlDaddyConfig();

            this._statsGLogger  = statsGLogger ?? GetDefaultStatsGLoggerInstance(_config);
            this._productAuthId = _config.CrawlDaddyAuthIdForSEV;
            this._productAppId  = _config.CrawlDaddyAppIdForSEV;
        }
コード例 #4
0
        public static void LogItem(string statsGTypeName, CrawlDaddyConfig configuration, float value = 1)
        {
            if (configuration.StatsGEnabled)
            {
                StatsGLoggerAppender logger = GetSingleton(configuration);

                logger.LogItem(statsGTypeName, value);
            }
        }
コード例 #5
0
        public void Constructor_IsChildOfAbotRateLimiter()
        {
            CrawlDaddyConfig uut = new CrawlDaddyConfig();

            Assert.AreEqual(0, uut.MinTimeBetweenDomainRetrievalRequestsInSecs);
            Assert.AreEqual(0, uut.MaxDomainRetrievalCount);
            Assert.AreEqual(0, uut.MaxConcurrentCrawls);
            Assert.AreEqual(0, uut.MaxPageProcessorTimeInMilliSecs);
            Assert.AreEqual(0, uut.MaxDomainProcessorTimeInMilliSecs);
        }
コード例 #6
0
        public void SetUp()
        {
            _fakeDomainProducer = new Mock <IDomainProducer>();
            _fakeDomainConsumer = new Mock <IDomainConsumer>();

            _dummyConfig = new CrawlDaddyConfig
            {
                MaxConcurrentCrawls = 5
            };

            _uut = GetInstance(_fakeDomainProducer.Object, _fakeDomainConsumer.Object, _dummyConfig);
        }
コード例 #7
0
        public void SetUp()
        {
            _fakeDomainProvider = new Mock <IDomainProvider>();
            _fakeDomainConsumer = new Mock <IDomainConsumer>();
            _fakeRateLimiter    = new Mock <IRateLimiter>();

            _dummyConfig = new CrawlDaddyConfig
            {
                MaxConcurrentCrawls     = 11,
                MaxDomainRetrievalCount = 10
            };

            _uut = new DomainProducer(_fakeDomainProvider.Object, _fakeRateLimiter.Object, _dummyConfig);
        }
コード例 #8
0
        private static StatsGLoggerAppender GetSingleton(CrawlDaddyConfig config)
        {
            // if this is the first time it is being called, we set up the private static members
            if (singleton == null)
            {
                lock (singletonLock)
                {
                    if (singleton == null)
                    {
                        singleton = new StatsGLoggerAppender(config);
                    }
                }
            }

            return(singleton);
        }
コード例 #9
0
        public ProducerConsumerBulkCrawler(IDomainProducer domainProducer, IDomainConsumer domainConsumer, CrawlDaddyConfig config)
        {
            if (domainProducer == null)
            {
                throw new ArgumentNullException("domainProducer");
            }

            if (domainConsumer == null)
            {
                throw new ArgumentNullException("domainConsumer");
            }

            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            _domainProducer = domainProducer;
            _domainConsumer = domainConsumer;
            _config         = config;
        }
コード例 #10
0
        public DomainProducer(IDomainProvider domainProvider, IRateLimiter domainRetrievalRateLimiter, CrawlDaddyConfig config)
        {
            if (domainProvider == null)
            {
                throw new ArgumentNullException("domainProvider");
            }

            if (domainRetrievalRateLimiter == null)
            {
                throw new ArgumentNullException("domainRetrievalRateLimiter");
            }

            if (config == null)
            {
                throw new ArgumentNullException("config");
            }

            _domainProvider             = domainProvider;
            _domainRetrievalRateLimiter = domainRetrievalRateLimiter;
            _config = config;
        }
コード例 #11
0
        public void SetUp()
        {
            _fakeWebCrawlerFactory = new Mock <IWebCrawlerFactory>();
            _fakeWebCrawler        = new Mock <IWebCrawler>();
            _fakeProcessorProvider = new Mock <IProcessorProvider>();
            _fakeProcessor1        = new Mock <ICrawlProcessor>();
            _fakeProcessor2        = new Mock <ICrawlProcessor>();
            _fakeProcessor3        = new Mock <ICrawlProcessor>();

            _dummyCrawlProcessors = new List <ICrawlProcessor>()
            {
                _fakeProcessor1.Object,
                _fakeProcessor2.Object,
                _fakeProcessor3.Object
            };
            _dummyProcessorContext = new ProcessorContext
            {
                PrimaryPersistenceProvider = new Mock <IPersistenceProvider>().Object,
                BackupPersistenceProvider  = new Mock <IPersistenceProvider>().Object
            };
            _dummyConfig = new CrawlDaddyConfig
            {
                MaxConcurrentCrawls = 11,
                MaxDomainProcessorTimeInMilliSecs = 2000,
                MaxPageProcessorTimeInMilliSecs   = 2000,
                MaxDomainRetrievalCount           = 10,
                MimeTypesToProcess    = new[] { "text/html" },
                HttpStatusesToProcess = new[] { "200" }
            };
            _dummyCancellationToken = new CancellationTokenSource();

            _fakeProcessorProvider.Setup(f => f.GetProcessors()).Returns(_dummyCrawlProcessors);
            _fakeWebCrawler.Setup(f => f.CrawlBag).Returns(new DummyCrawlBag());

            _uut = new DomainConsumer(_fakeWebCrawlerFactory.Object, _fakeProcessorProvider.Object, _dummyProcessorContext, _dummyConfig);
        }
コード例 #12
0
 public CustomRateLimiter(CrawlDaddyConfig config)
     : base(1, TimeSpan.FromSeconds(config.MinTimeBetweenDomainRetrievalRequestsInSecs))
 {
 }
コード例 #13
0
 public void SetUp()
 {
     _fakeStatsGLogger = new Mock <IStatsGLogger>();
     _config           = new CrawlDaddyConfig();
 }
コード例 #14
0
 public override IBulkCrawler GetInstance(IDomainProducer producer, IDomainConsumer consumer, CrawlDaddyConfig config)
 {
     return(new ProducerConsumerBulkCrawler(producer, consumer, config));
 }
コード例 #15
0
 public abstract IBulkCrawler GetInstance(IDomainProducer producer, IDomainConsumer consumer, CrawlDaddyConfig config);
コード例 #16
0
 public StatsGLoggerAppender(CrawlDaddyConfig config)
     : this(config, null)
 {
 }