コード例 #1
0
        public void Convert_CovertsFromSectionObjectToDtoObject()
        {
            CrawlConfiguration result = _config.Convert();

            Assert.IsNotNull(result);
            Assert.AreEqual(result.CrawlTimeoutSeconds, _config.CrawlBehavior.CrawlTimeoutSeconds);
            Assert.AreEqual(result.DownloadableContentTypes, _config.CrawlBehavior.DownloadableContentTypes);
            Assert.AreEqual(result.IsUriRecrawlingEnabled, _config.CrawlBehavior.IsUriRecrawlingEnabled);
            Assert.AreEqual(result.MaxConcurrentThreads, _config.CrawlBehavior.MaxConcurrentThreads);
            Assert.AreEqual(result.MaxPagesToCrawl, _config.CrawlBehavior.MaxPagesToCrawl);
            Assert.AreEqual(result.MaxPagesToCrawlPerDomain, _config.CrawlBehavior.MaxPagesToCrawlPerDomain);
            Assert.AreEqual(result.MaxPageSizeInBytes, _config.CrawlBehavior.MaxPageSizeInBytes);
            Assert.AreEqual(result.UserAgentString, _config.CrawlBehavior.UserAgentString);
            Assert.AreEqual(result.IsExternalPageCrawlingEnabled, _config.CrawlBehavior.IsExternalPageCrawlingEnabled);
            Assert.AreEqual(result.IsExternalPageLinksCrawlingEnabled, _config.CrawlBehavior.IsExternalPageLinksCrawlingEnabled);
            Assert.AreEqual(result.HttpServicePointConnectionLimit, _config.CrawlBehavior.HttpServicePointConnectionLimit);
            Assert.AreEqual(result.HttpRequestTimeoutInSeconds, _config.CrawlBehavior.HttpRequestTimeoutInSeconds);
            Assert.AreEqual(result.HttpRequestMaxAutoRedirects, _config.CrawlBehavior.HttpRequestMaxAutoRedirects);
            Assert.AreEqual(true, _config.CrawlBehavior.IsHttpRequestAutoRedirectsEnabled);
            Assert.AreEqual(true, _config.CrawlBehavior.IsHttpRequestAutomaticDecompressionEnabled);
            Assert.AreEqual(result.MinAvailableMemoryRequiredInMb, _config.CrawlBehavior.MinAvailableMemoryRequiredInMb);
            Assert.AreEqual(result.MaxMemoryUsageInMb, _config.CrawlBehavior.MaxMemoryUsageInMb);
            Assert.AreEqual(result.MaxMemoryUsageCacheTimeInSeconds, _config.CrawlBehavior.MaxMemoryUsageCacheTimeInSeconds);
            Assert.AreEqual(result.MaxCrawlDepth, _config.CrawlBehavior.MaxCrawlDepth);
            Assert.AreEqual(result.IsForcedLinkParsingEnabled, _config.CrawlBehavior.IsForcedLinkParsingEnabled);

            Assert.AreEqual(result.IsRespectRobotsDotTextEnabled, _config.Politeness.IsRespectRobotsDotTextEnabled);
            Assert.AreEqual(result.RobotsDotTextUserAgentString, _config.Politeness.RobotsDotTextUserAgentString);
            Assert.AreEqual(result.MinCrawlDelayPerDomainMilliSeconds, _config.Politeness.MinCrawlDelayPerDomainMilliSeconds);
            Assert.AreEqual(result.MaxRobotsDotTextCrawlDelayInSeconds, _config.Politeness.MaxRobotsDotTextCrawlDelayInSeconds);

            Assert.IsNotNull(result.ConfigurationExtensions);
            Assert.AreEqual(result.ConfigurationExtensions["key1"], _config.ExtensionValues[0].Value);
            Assert.AreEqual(result.ConfigurationExtensions["key2"], _config.ExtensionValues[1].Value);
        }
コード例 #2
0
        private CrawlConfiguration GetCrawlConfigurationFromConfigFile()
        {
            AbotConfigurationSectionHandler configFromFile = AbotConfigurationSectionHandler.LoadFromXml();

            if (configFromFile == null)
            {
                throw new ApplicationException("Config section \"abot\" was NOT found");
            }

            return(configFromFile.Convert());
        }
コード例 #3
0
ファイル: WebCrawler.cs プロジェクト: yhtsnda/abot
        private CrawlConfiguration GetCrawlConfigurationFromConfigFile()
        {
            AbotConfigurationSectionHandler configFromFile = AbotConfigurationSectionHandler.LoadFromXml();

            if (configFromFile == null)
            {
                throw new InvalidOperationException("abot config section was NOT found");
            }

            _logger.DebugFormat("abot config section was found");
            return(configFromFile.Convert());
        }
コード例 #4
0
        public void Convert_CovertsFromSectionObjectToDtoObject()
        {
            CrawlConfiguration result = _uut.Convert();

            Assert.IsNotNull(result);
            Assert.AreEqual(result.CrawlTimeoutSeconds, _uut.CrawlBehavior.CrawlTimeoutSeconds);
            Assert.AreEqual(result.DownloadableContentTypes, _uut.CrawlBehavior.DownloadableContentTypes);
            Assert.AreEqual(result.IsUriRecrawlingEnabled, _uut.CrawlBehavior.IsUriRecrawlingEnabled);
            Assert.AreEqual(result.MaxConcurrentThreads, _uut.CrawlBehavior.MaxConcurrentThreads);
            Assert.AreEqual(result.MaxPagesToCrawl, _uut.CrawlBehavior.MaxPagesToCrawl);
            Assert.AreEqual(result.MaxPagesToCrawlPerDomain, _uut.CrawlBehavior.MaxPagesToCrawlPerDomain);
            Assert.AreEqual(result.MaxPageSizeInBytes, _uut.CrawlBehavior.MaxPageSizeInBytes);
            Assert.AreEqual(result.UserAgentString, _uut.CrawlBehavior.UserAgentString);
            Assert.AreEqual(result.HttpProtocolVersion, HttpProtocolVersion.Version10);
            Assert.AreEqual(result.IsExternalPageCrawlingEnabled, _uut.CrawlBehavior.IsExternalPageCrawlingEnabled);
            Assert.AreEqual(result.IsExternalPageLinksCrawlingEnabled, _uut.CrawlBehavior.IsExternalPageLinksCrawlingEnabled);
            Assert.AreEqual(result.HttpServicePointConnectionLimit, _uut.CrawlBehavior.HttpServicePointConnectionLimit);
            Assert.AreEqual(result.HttpRequestTimeoutInSeconds, _uut.CrawlBehavior.HttpRequestTimeoutInSeconds);
            Assert.AreEqual(result.HttpRequestMaxAutoRedirects, _uut.CrawlBehavior.HttpRequestMaxAutoRedirects);
            Assert.AreEqual(true, _uut.CrawlBehavior.IsHttpRequestAutoRedirectsEnabled);
            Assert.AreEqual(true, _uut.CrawlBehavior.IsHttpRequestAutomaticDecompressionEnabled);
            Assert.AreEqual(true, _uut.CrawlBehavior.IsSendingCookiesEnabled);
            Assert.AreEqual(false, _uut.CrawlBehavior.IsSslCertificateValidationEnabled);
            Assert.AreEqual(true, _uut.CrawlBehavior.IsRespectUrlNamedAnchorOrHashbangEnabled);
            Assert.AreEqual(result.MinAvailableMemoryRequiredInMb, _uut.CrawlBehavior.MinAvailableMemoryRequiredInMb);
            Assert.AreEqual(result.MaxMemoryUsageInMb, _uut.CrawlBehavior.MaxMemoryUsageInMb);
            Assert.AreEqual(result.MaxMemoryUsageCacheTimeInSeconds, _uut.CrawlBehavior.MaxMemoryUsageCacheTimeInSeconds);
            Assert.AreEqual(result.MaxCrawlDepth, _uut.CrawlBehavior.MaxCrawlDepth);
            Assert.AreEqual(result.MaxLinksPerPage, _uut.CrawlBehavior.MaxLinksPerPage);
            Assert.AreEqual(result.IsForcedLinkParsingEnabled, _uut.CrawlBehavior.IsForcedLinkParsingEnabled);
            Assert.AreEqual(result.MaxRetryCount, _uut.CrawlBehavior.MaxRetryCount);
            Assert.AreEqual(result.MinRetryDelayInMilliseconds, _uut.CrawlBehavior.MinRetryDelayInMilliseconds);

            Assert.AreEqual(result.IsRespectRobotsDotTextEnabled, _uut.Politeness.IsRespectRobotsDotTextEnabled);
            Assert.AreEqual(result.IsRespectMetaRobotsNoFollowEnabled, _uut.Politeness.IsRespectMetaRobotsNoFollowEnabled);
            Assert.AreEqual(result.IsRespectHttpXRobotsTagHeaderNoFollowEnabled, _uut.Politeness.IsRespectHttpXRobotsTagHeaderNoFollowEnabled);
            Assert.AreEqual(result.IsRespectAnchorRelNoFollowEnabled, _uut.Politeness.IsRespectAnchorRelNoFollowEnabled);

            Assert.AreEqual(result.IsIgnoreRobotsDotTextIfRootDisallowedEnabled, _uut.Politeness.IsIgnoreRobotsDotTextIfRootDisallowedEnabled);
            Assert.AreEqual(result.RobotsDotTextUserAgentString, _uut.Politeness.RobotsDotTextUserAgentString);

            Assert.AreEqual(result.MinCrawlDelayPerDomainMilliSeconds, _uut.Politeness.MinCrawlDelayPerDomainMilliSeconds);
            Assert.AreEqual(result.MaxRobotsDotTextCrawlDelayInSeconds, _uut.Politeness.MaxRobotsDotTextCrawlDelayInSeconds);

            Assert.AreEqual(result.IsAlwaysLogin, _uut.Authorization.IsAlwaysLogin);
            Assert.AreEqual(result.LoginPassword, _uut.Authorization.LoginPassword);
            Assert.AreEqual(result.LoginUser, _uut.Authorization.LoginUser);
            Assert.AreEqual(result.UseDefaultCredentials, _uut.Authorization.UseDefaultCredentials);

            Assert.IsNotNull(result.ConfigurationExtensions);
            Assert.AreEqual(result.ConfigurationExtensions["key1"], _uut.ExtensionValues[0].Value);
            Assert.AreEqual(result.ConfigurationExtensions["key2"], _uut.ExtensionValues[1].Value);
        }
コード例 #5
0
ファイル: WebCrawler.cs プロジェクト: dagstuan/abot
        private CrawlConfiguration GetCrawlConfigurationFromConfigFile()
        {
            AbotConfigurationSectionHandler configFromFile = null;

            try{ configFromFile = AbotConfigurationSectionHandler.LoadFromXml(); } catch {}

            if (configFromFile == null)
            {
                _logger.DebugFormat("abot config section was NOT found");
                return(null);
            }

            _logger.DebugFormat("abot config section was found");
            return(configFromFile.Convert());
        }