Exemplo n.º 1
0
        /// <summary>
        ///     Queue a new step on the crawler queue
        /// </summary>
        /// <param name = "uri">url to crawl</param>
        /// <param name = "depth">depth of the url</param>
        /// <param name = "referrer">Step which the url was located</param>
        /// <param name = "properties">Custom properties</param>
        public void AddStep(Uri uri, int depth, CrawlStep referrer, Dictionary <string, object> properties)
        {
            if (!m_Crawling)
            {
                throw new InvalidOperationException("Crawler must be running before adding steps");
            }

            if (m_CrawlStopped)
            {
                return;
            }

            if ((uri.Scheme != Uri.UriSchemeHttps && uri.Scheme != Uri.UriSchemeHttp) ||             // Only accept http(s) schema
                (MaximumCrawlDepth.HasValue && MaximumCrawlDepth.Value > 0 && depth >= MaximumCrawlDepth.Value) ||
                !m_CrawlerRules.IsAllowedUrl(uri, referrer) ||
                !m_CrawlerHistory.Register(uri.GetUrlKeyString(UriSensitivity)))
            {
                if (depth == 0)
                {
                    StopCrawl();
                }

                return;
            }

            // Make new crawl step
            CrawlStep crawlStep = new CrawlStep(uri, depth)
            {
                IsExternalUrl = m_CrawlerRules.IsExternalUrl(uri),
                IsAllowed     = true,
            };

            m_CrawlerQueue.Push(new CrawlerQueueEntry
            {
                CrawlStep  = crawlStep,
                Referrer   = referrer,
                Properties = properties
            });
            m_Logger.Verbose("Added {0} to queue referred from {1}",
                             crawlStep.Uri, referrer.IsNull() ? string.Empty : referrer.Uri.ToString());
            ProcessQueue();
        }