Example #1
0
        /// <summary>
        ///     Processes the crawl requests.
        /// </summary>
        private void ProcessCrawlRequests()
        {
            while (Crawler.Engine.State == EngineState.Start && UncrawledCrawlRequests.Count != 0)
            {
                _crawlInfo.CurrentCrawlRequest       = UncrawledCrawlRequests.Dequeue();
                _crawlInfo.CurrentCrawlRequest.Crawl = this;

                if (_crawlInfo.CurrentCrawlRequest != null)
                {
                    _crawlInfo.EnqueuedCrawlRequests = UncrawledCrawlRequests.Count;

                    if (_crawlInfo.CurrentCrawlRequest.CurrentDepth > _crawlInfo.MaximumCrawlDepth)
                    {
                        _crawlInfo.MaximumCrawlDepth = _crawlInfo.CurrentCrawlRequest.CurrentDepth;
                    }

                    ProcessCrawlRequest(_crawlInfo.CurrentCrawlRequest, true, true);

                    if (_crawlInfo.TotalCrawlRequestsProcessed % 10 == 0)
                    {
                        _crawler.CrawlerPeerManager.SendStatusMessageToCrawlerPeers(_arachnodeDAO);
                    }
                }
            }

            _crawlInfo.CurrentCrawlRequest = null;

            Thread.Sleep(100);
        }
Example #2
0
        /// <summary>
        ///     Begins a Crawl.  This method bypasses the Cache, and is experimental/for advanced users.
        ///     This method does not function with the DEMO version.
        /// </summary>
        /// <param name = "crawlRequest"></param>
        /// <param name = "obeyCrawlRules"></param>
        /// <param name = "executeCrawlActions"></param>
        public void BeginCrawl(CrawlRequest <TArachnodeDAO> crawlRequest, bool obeyCrawlRules, bool executeCrawlActions, bool processDiscoveriesAsynchronously)
        {
#if DEMO
            return;
#endif
            _crawlInfo.ThreadNumber = -1;

            do
            {
                crawlRequest.Crawl = this;

                crawlRequest.Crawl.IsProcessingDiscoveriesAsynchronously = !processDiscoveriesAsynchronously;
                crawlRequest.CurrentDepth = crawlRequest.MaximumDepth;

                lock (_beginCrawlLock)
                {
                    ProcessCrawlRequest(crawlRequest, obeyCrawlRules, executeCrawlActions);

                    crawlRequest = UncrawledCrawlRequests.Dequeue();
                }
            } while (crawlRequest != null);
        }
Example #3
0
        /// <summary>
        ///     Saves the crawl requests to database.
        /// </summary>
        internal void SaveCrawlRequestsToDatabase()
        {
            while (UncrawledCrawlRequests.Count != 0)
            {
                _consoleManager.OutputString("Saving Crawl.UncrawledCrawlRequests: " + _crawlInfo.ThreadNumber + " : " + UncrawledCrawlRequests.Count + " CrawlRequests remaining to be inserted.", ConsoleColor.Gray, ConsoleColor.Gray);

                CrawlRequest <TArachnodeDAO> crawlRequest = UncrawledCrawlRequests.Dequeue();

                if (!_ruleManager.IsDisallowed(crawlRequest, CrawlRuleType.PreRequest, _arachnodeDAO))
                {
                    if (crawlRequest.Originator != null)
                    {
                        if (_applicationSettings.InsertCrawlRequests)
                        {
                            _arachnodeDAO.InsertCrawlRequest(crawlRequest.Created, crawlRequest.Originator.Uri.AbsoluteUri, crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.CurrentDepth, crawlRequest.MaximumDepth, crawlRequest.RestrictCrawlTo, crawlRequest.RestrictDiscoveriesTo, crawlRequest.Priority, (byte)crawlRequest.RenderType, (byte)crawlRequest.RenderTypeForChildren);
                        }
                    }
                    else
                    {
                        if (_applicationSettings.InsertCrawlRequests)
                        {
                            _arachnodeDAO.InsertCrawlRequest(crawlRequest.Created, null, crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.CurrentDepth, crawlRequest.MaximumDepth, crawlRequest.RestrictCrawlTo, crawlRequest.RestrictDiscoveriesTo, crawlRequest.Priority, (byte)crawlRequest.RenderType, (byte)crawlRequest.RenderTypeForChildren);
                        }
                    }
                }
                else
                {
                    if (_applicationSettings.InsertDisallowedAbsoluteUris)
                    {
                        _arachnodeDAO.InsertDisallowedAbsoluteUri(crawlRequest.DataType.ContentTypeID, (int)crawlRequest.DataType.DiscoveryType, crawlRequest.Parent.Uri.AbsoluteUri, crawlRequest.Discovery.Uri.AbsoluteUri, crawlRequest.IsDisallowedReason, _applicationSettings.ClassifyAbsoluteUris);
                    }
                }

                Counters.GetInstance().CrawlRequestRemoved();
            }
        }