/// <summary> /// Processes the crawl requests. /// </summary> private void ProcessCrawlRequests() { while (Crawler.Engine.State == EngineState.Start && UncrawledCrawlRequests.Count != 0) { _crawlInfo.CurrentCrawlRequest = UncrawledCrawlRequests.Dequeue(); _crawlInfo.CurrentCrawlRequest.Crawl = this; if (_crawlInfo.CurrentCrawlRequest != null) { _crawlInfo.EnqueuedCrawlRequests = UncrawledCrawlRequests.Count; if (_crawlInfo.CurrentCrawlRequest.CurrentDepth > _crawlInfo.MaximumCrawlDepth) { _crawlInfo.MaximumCrawlDepth = _crawlInfo.CurrentCrawlRequest.CurrentDepth; } ProcessCrawlRequest(_crawlInfo.CurrentCrawlRequest, true, true); if (_crawlInfo.TotalCrawlRequestsProcessed % 10 == 0) { _crawler.CrawlerPeerManager.SendStatusMessageToCrawlerPeers(_arachnodeDAO); } } } _crawlInfo.CurrentCrawlRequest = null; Thread.Sleep(100); }
/// <summary> /// Begins a Crawl. This method bypasses the Cache, and is experimental/for advanced users. /// This method does not function with the DEMO version. /// </summary> /// <param name = "crawlRequest"></param> /// <param name = "obeyCrawlRules"></param> /// <param name = "executeCrawlActions"></param> public void BeginCrawl(CrawlRequest <TArachnodeDAO> crawlRequest, bool obeyCrawlRules, bool executeCrawlActions, bool processDiscoveriesAsynchronously) { #if DEMO return; #endif _crawlInfo.ThreadNumber = -1; do { crawlRequest.Crawl = this; crawlRequest.Crawl.IsProcessingDiscoveriesAsynchronously = !processDiscoveriesAsynchronously; crawlRequest.CurrentDepth = crawlRequest.MaximumDepth; lock (_beginCrawlLock) { ProcessCrawlRequest(crawlRequest, obeyCrawlRules, executeCrawlActions); crawlRequest = UncrawledCrawlRequests.Dequeue(); } } while (crawlRequest != null); }
/// <summary> /// Saves the crawl requests to database. /// </summary> internal void SaveCrawlRequestsToDatabase() { while (UncrawledCrawlRequests.Count != 0) { _consoleManager.OutputString("Saving Crawl.UncrawledCrawlRequests: " + _crawlInfo.ThreadNumber + " : " + UncrawledCrawlRequests.Count + " CrawlRequests remaining to be inserted.", ConsoleColor.Gray, ConsoleColor.Gray); CrawlRequest <TArachnodeDAO> crawlRequest = UncrawledCrawlRequests.Dequeue(); if (!_ruleManager.IsDisallowed(crawlRequest, CrawlRuleType.PreRequest, _arachnodeDAO)) { if (crawlRequest.Originator != null) { if (_applicationSettings.InsertCrawlRequests) { _arachnodeDAO.InsertCrawlRequest(crawlRequest.Created, crawlRequest.Originator.Uri.AbsoluteUri, crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.CurrentDepth, crawlRequest.MaximumDepth, crawlRequest.RestrictCrawlTo, crawlRequest.RestrictDiscoveriesTo, crawlRequest.Priority, (byte)crawlRequest.RenderType, (byte)crawlRequest.RenderTypeForChildren); } } else { if (_applicationSettings.InsertCrawlRequests) { _arachnodeDAO.InsertCrawlRequest(crawlRequest.Created, null, crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.CurrentDepth, crawlRequest.MaximumDepth, crawlRequest.RestrictCrawlTo, crawlRequest.RestrictDiscoveriesTo, crawlRequest.Priority, (byte)crawlRequest.RenderType, (byte)crawlRequest.RenderTypeForChildren); } } } else { if (_applicationSettings.InsertDisallowedAbsoluteUris) { _arachnodeDAO.InsertDisallowedAbsoluteUri(crawlRequest.DataType.ContentTypeID, (int)crawlRequest.DataType.DiscoveryType, crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.IsDisallowedReason, _applicationSettings.ClassifyAbsoluteUris); } } Counters.GetInstance().CrawlRequestRemoved(); } }