public DomainConsumer(IWebCrawlerFactory crawlerFactory, IProcessorProvider processorProvider, ProcessorContext processorContext, CrawlDaddyConfig config) { if (crawlerFactory == null) { throw new ArgumentNullException("crawlerFactory"); } if (processorProvider == null) { throw new ArgumentNullException("processorProvider"); } if (processorContext == null) { throw new ArgumentNullException("processorContext"); } if (config == null) { throw new ArgumentNullException("config"); } _crawlerFactory = crawlerFactory; _processorProvider = processorProvider; _processorContext = processorContext; _config = config; }
private IStatsGLogger GetDefaultStatsGLoggerInstance(CrawlDaddyConfig config) { string hostUrl = config.StatsGHostConfigName; int port = config.StatsGPortConfigName; var statsGLogger = new StatsGLogger(hostUrl, port); return(statsGLogger); }
public StatsGLoggerAppender(CrawlDaddyConfig config, IStatsGLogger statsGLogger) { _config = config ?? new CrawlDaddyConfig(); this._statsGLogger = statsGLogger ?? GetDefaultStatsGLoggerInstance(_config); this._productAuthId = _config.CrawlDaddyAuthIdForSEV; this._productAppId = _config.CrawlDaddyAppIdForSEV; }
public static void LogItem(string statsGTypeName, CrawlDaddyConfig configuration, float value = 1) { if (configuration.StatsGEnabled) { StatsGLoggerAppender logger = GetSingleton(configuration); logger.LogItem(statsGTypeName, value); } }
public void Constructor_IsChildOfAbotRateLimiter() { CrawlDaddyConfig uut = new CrawlDaddyConfig(); Assert.AreEqual(0, uut.MinTimeBetweenDomainRetrievalRequestsInSecs); Assert.AreEqual(0, uut.MaxDomainRetrievalCount); Assert.AreEqual(0, uut.MaxConcurrentCrawls); Assert.AreEqual(0, uut.MaxPageProcessorTimeInMilliSecs); Assert.AreEqual(0, uut.MaxDomainProcessorTimeInMilliSecs); }
public void SetUp() { _fakeDomainProducer = new Mock <IDomainProducer>(); _fakeDomainConsumer = new Mock <IDomainConsumer>(); _dummyConfig = new CrawlDaddyConfig { MaxConcurrentCrawls = 5 }; _uut = GetInstance(_fakeDomainProducer.Object, _fakeDomainConsumer.Object, _dummyConfig); }
public void SetUp() { _fakeDomainProvider = new Mock <IDomainProvider>(); _fakeDomainConsumer = new Mock <IDomainConsumer>(); _fakeRateLimiter = new Mock <IRateLimiter>(); _dummyConfig = new CrawlDaddyConfig { MaxConcurrentCrawls = 11, MaxDomainRetrievalCount = 10 }; _uut = new DomainProducer(_fakeDomainProvider.Object, _fakeRateLimiter.Object, _dummyConfig); }
private static StatsGLoggerAppender GetSingleton(CrawlDaddyConfig config) { // if this is the first time it is being called, we set up the private static members if (singleton == null) { lock (singletonLock) { if (singleton == null) { singleton = new StatsGLoggerAppender(config); } } } return(singleton); }
public ProducerConsumerBulkCrawler(IDomainProducer domainProducer, IDomainConsumer domainConsumer, CrawlDaddyConfig config) { if (domainProducer == null) { throw new ArgumentNullException("domainProducer"); } if (domainConsumer == null) { throw new ArgumentNullException("domainConsumer"); } if (config == null) { throw new ArgumentNullException("config"); } _domainProducer = domainProducer; _domainConsumer = domainConsumer; _config = config; }
public DomainProducer(IDomainProvider domainProvider, IRateLimiter domainRetrievalRateLimiter, CrawlDaddyConfig config) { if (domainProvider == null) { throw new ArgumentNullException("domainProvider"); } if (domainRetrievalRateLimiter == null) { throw new ArgumentNullException("domainRetrievalRateLimiter"); } if (config == null) { throw new ArgumentNullException("config"); } _domainProvider = domainProvider; _domainRetrievalRateLimiter = domainRetrievalRateLimiter; _config = config; }
public void SetUp() { _fakeWebCrawlerFactory = new Mock <IWebCrawlerFactory>(); _fakeWebCrawler = new Mock <IWebCrawler>(); _fakeProcessorProvider = new Mock <IProcessorProvider>(); _fakeProcessor1 = new Mock <ICrawlProcessor>(); _fakeProcessor2 = new Mock <ICrawlProcessor>(); _fakeProcessor3 = new Mock <ICrawlProcessor>(); _dummyCrawlProcessors = new List <ICrawlProcessor>() { _fakeProcessor1.Object, _fakeProcessor2.Object, _fakeProcessor3.Object }; _dummyProcessorContext = new ProcessorContext { PrimaryPersistenceProvider = new Mock <IPersistenceProvider>().Object, BackupPersistenceProvider = new Mock <IPersistenceProvider>().Object }; _dummyConfig = new CrawlDaddyConfig { MaxConcurrentCrawls = 11, MaxDomainProcessorTimeInMilliSecs = 2000, MaxPageProcessorTimeInMilliSecs = 2000, MaxDomainRetrievalCount = 10, MimeTypesToProcess = new[] { "text/html" }, HttpStatusesToProcess = new[] { "200" } }; _dummyCancellationToken = new CancellationTokenSource(); _fakeProcessorProvider.Setup(f => f.GetProcessors()).Returns(_dummyCrawlProcessors); _fakeWebCrawler.Setup(f => f.CrawlBag).Returns(new DummyCrawlBag()); _uut = new DomainConsumer(_fakeWebCrawlerFactory.Object, _fakeProcessorProvider.Object, _dummyProcessorContext, _dummyConfig); }
public CustomRateLimiter(CrawlDaddyConfig config) : base(1, TimeSpan.FromSeconds(config.MinTimeBetweenDomainRetrievalRequestsInSecs)) { }
public void SetUp() { _fakeStatsGLogger = new Mock <IStatsGLogger>(); _config = new CrawlDaddyConfig(); }
public override IBulkCrawler GetInstance(IDomainProducer producer, IDomainConsumer consumer, CrawlDaddyConfig config) { return(new ProducerConsumerBulkCrawler(producer, consumer, config)); }
public abstract IBulkCrawler GetInstance(IDomainProducer producer, IDomainConsumer consumer, CrawlDaddyConfig config);
public StatsGLoggerAppender(CrawlDaddyConfig config) : this(config, null) { }