Пример #1
0
        /// <summary>
        /// Creates object of type DefaultCrawler needed for Crawler process
        /// </summary>
        /// <param name="crawlerConfig">Crawler configuration</param>
        /// <returns>
        /// Crawler
        /// </returns>
        public DefaultCrawler ConfigureCrawler(CrawlerModel crawlerConfig)
        {
            var crawlerCfg = new CrawlerConfiguration
            {
                Uri = new Uri(crawlerConfig.SiteUrl),
                CountLimit = crawlerConfig.CountLimit,
                DepthLimit = crawlerConfig.DepthLimit,
                IgnoredPrefixes = crawlerConfig.IgnoredPrefixes,
                IgnoredPaths = crawlerConfig.IgnoredPaths
            };

            var directory = crawlerConfig.Directory;
            var imageHandler = new ImageHandler(directory, null);
            var scriptHandler = new ScriptHandler(directory, imageHandler);
            var loader = new HtmlAgilityPackLoader(scriptHandler);

            return new DefaultCrawler(crawlerCfg, loader);
        }
Пример #2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="DefaultCrawler"/> class.
        /// </summary>
        /// <param name="configuration">The configuration of the crawler, determining what to crawl and when to stop.</param>
        public DefaultCrawler(CrawlerConfiguration configuration, HtmlAgilityPackLoader loader)
        {
            if (configuration == null)
            {
                throw new ArgumentNullException("configuration");
            }
            if (loader == null)
            {
                throw new ArgumentNullException("loader");
            }

            this.baseUri = configuration.Uri;
            this.countLimit = configuration.CountLimit;
            this.depthLimit = configuration.DepthLimit;
            this.loader = loader;
            this.ignoredPaths = configuration.IgnoredPaths;
            this.ignoredPrefixes = configuration.IgnoredPrefixes;
        }