/// <summary> /// Creates object of type DefaultCrawler needed for Crawler process /// </summary> /// <param name="crawlerConfig">Crawler configuration</param> /// <returns> /// Crawler /// </returns> public DefaultCrawler ConfigureCrawler(CrawlerModel crawlerConfig) { var crawlerCfg = new CrawlerConfiguration { Uri = new Uri(crawlerConfig.SiteUrl), CountLimit = crawlerConfig.CountLimit, DepthLimit = crawlerConfig.DepthLimit, IgnoredPrefixes = crawlerConfig.IgnoredPrefixes, IgnoredPaths = crawlerConfig.IgnoredPaths }; var directory = crawlerConfig.Directory; var imageHandler = new ImageHandler(directory, null); var scriptHandler = new ScriptHandler(directory, imageHandler); var loader = new HtmlAgilityPackLoader(scriptHandler); return new DefaultCrawler(crawlerCfg, loader); }
/// <summary> /// Initializes a new instance of the <see cref="DefaultCrawler"/> class. /// </summary> /// <param name="configuration">The configuration of the crawler, determining what to crawl and when to stop.</param> public DefaultCrawler(CrawlerConfiguration configuration, HtmlAgilityPackLoader loader) { if (configuration == null) { throw new ArgumentNullException("configuration"); } if (loader == null) { throw new ArgumentNullException("loader"); } this.baseUri = configuration.Uri; this.countLimit = configuration.CountLimit; this.depthLimit = configuration.DepthLimit; this.loader = loader; this.ignoredPaths = configuration.IgnoredPaths; this.ignoredPrefixes = configuration.IgnoredPrefixes; }