Beispiel #1
0
        public void Test5(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            DateTime now = DateTime.Now;

            crawlQueue.Push(new CrawlerQueueEntry
            {
                CrawlStep  = new CrawlStep(new Uri("http://www.biz.org/"), 0),
                Properties = new Dictionary <string, object>
                {
                    { "one", "string" },
                    { "two", 123 },
                    { "three", now },
                },
                Referrer = new CrawlStep(new Uri("http://www.biz3.org/"), 1)
            });
            Assert.AreEqual(1, crawlQueue.Count);
            CrawlerQueueEntry entry = crawlQueue.Pop();

            Assert.AreEqual(0, crawlQueue.Count);
            Assert.NotNull(entry);
            Assert.NotNull(entry.CrawlStep);
            Assert.NotNull(entry.Properties);
            Assert.NotNull(entry.Referrer);
            Assert.AreEqual(0, entry.CrawlStep.Depth);
            Assert.AreEqual("http://www.biz.org/", entry.CrawlStep.Uri.ToString());
            Assert.AreEqual("one", entry.Properties.Keys.First());
            Assert.AreEqual("two", entry.Properties.Keys.Skip(1).First());
            Assert.AreEqual("three", entry.Properties.Keys.Skip(2).First());
            Assert.AreEqual("string", entry.Properties["one"]);
            Assert.AreEqual(123, entry.Properties["two"]);
            Assert.AreEqual(now, entry.Properties["three"]);
            Assert.AreEqual(0, crawlQueue.Count);
        }
Beispiel #2
0
 public void Test3(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     crawlQueue.Push(new CrawlerQueueEntry());
     crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
 }
Beispiel #3
0
 public void Test5(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     DateTime now = DateTime.Now;
     crawlQueue.Push(new CrawlerQueueEntry
         {
             CrawlStep = new CrawlStep(new Uri("http://www.biz.org/"), 0),
             Properties = new Dictionary<string, object>
                 {
                     {"one", "string"},
                     {"two", 123},
                     {"three", now},
                 },
             Referrer = new CrawlStep(new Uri("http://www.biz3.org/"), 1)
         });
     Assert.AreEqual(1, crawlQueue.Count);
     CrawlerQueueEntry entry = crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
     Assert.NotNull(entry);
     Assert.NotNull(entry.CrawlStep);
     Assert.NotNull(entry.Properties);
     Assert.NotNull(entry.Referrer);
     Assert.AreEqual(0, entry.CrawlStep.Depth);
     Assert.AreEqual("http://www.biz.org/", entry.CrawlStep.Uri.ToString());
     Assert.AreEqual("one", entry.Properties.Keys.First());
     Assert.AreEqual("two", entry.Properties.Keys.Skip(1).First());
     Assert.AreEqual("three", entry.Properties.Keys.Skip(2).First());
     Assert.AreEqual("string", entry.Properties["one"]);
     Assert.AreEqual(123, entry.Properties["two"]);
     Assert.AreEqual(now, entry.Properties["three"]);
     Assert.AreEqual(0, crawlQueue.Count);
 }
Beispiel #4
0
 public void Test3(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     crawlQueue.Push(new CrawlerQueueEntry());
     crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
 }
Beispiel #5
0
 public void Test4(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     crawlQueue.Push(new CrawlerQueueEntry());
     crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
     var actualValue = crawlQueue.Pop();
     Assert.IsNull(actualValue);
 }
Beispiel #6
0
        public void Test4(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            crawlQueue.Push(new CrawlerQueueEntry());
            crawlQueue.Pop();
            Assert.AreEqual(0, crawlQueue.Count);
            var actualValue = crawlQueue.Pop();

            Assert.IsNull(actualValue);
        }
Beispiel #7
0
        public void Test2(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            crawlQueue.Push(new CrawlerQueueEntry());
            Assert.AreEqual(1, crawlQueue.Count);

            if (crawlQueue is IDisposable)
            {
                ((IDisposable)crawlQueue).Dispose();
            }
        }
        public void Test2(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            crawlQueue.Push(new CrawlerQueueEntry());
            Assert.AreEqual(1, crawlQueue.Count);

            if (crawlQueue is IDisposable)
            {
                ((IDisposable)crawlQueue).Dispose();
            }
        }
Beispiel #9
0
        /// <summary>
        /// Queue a new step on the crawler queue
        /// </summary>
        /// <param name="uri">url to crawl</param>
        /// <param name="depth">depth of the url</param>
        /// <param name="referrer">Step which the url was located</param>
        /// <param name="properties">Custom properties</param>
        public void AddStep(Uri uri, int depth, CrawlStep referrer, Dictionary <string, object> properties)
        {
            if (!m_Crawling)
            {
                throw new InvalidOperationException("Crawler must be running before adding steps");
            }

            if (m_CrawlStopped)
            {
                return;
            }

            if ((uri.Scheme != Uri.UriSchemeHttps && uri.Scheme != Uri.UriSchemeHttp) ||             // Only accept http(s) schema
                (MaximumCrawlDepth.HasValue && MaximumCrawlDepth.Value > 0 && depth >= MaximumCrawlDepth.Value) ||
                !IsAllowedUrl(uri, referrer))
            {
                if (depth == 0)
                {
                    StopCrawl();
                }

                return;
            }


            if (!m_CrawlerHistory.Register(uri.GetUrlKeyString(UriSensitivity)))
            {
                return;
            }

            // Make new crawl step
            CrawlStep crawlStep = new CrawlStep(uri, depth)
            {
                IsExternalUrl = IsExternalUrl(uri),
                IsAllowed     = true,
            };

            m_CrawlerQueue.Push(new CrawlerQueueEntry
            {
                CrawlStep  = crawlStep,
                Referrer   = referrer,
                Properties = properties
            });
            m_Logger.Verbose("Added {0} to queue referred from {1}",
                             crawlStep.Uri, referrer.IsNull() ? string.Empty : referrer.Uri.ToString());
            StartNew();
        }