Ejemplo n.º 1
0
        public void Test5(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            DateTime now = DateTime.Now;

            crawlQueue.Push(new CrawlerQueueEntry
            {
                CrawlStep  = new CrawlStep(new Uri("http://www.biz.org/"), 0),
                Properties = new Dictionary <string, object>
                {
                    { "one", "string" },
                    { "two", 123 },
                    { "three", now },
                },
                Referrer = new CrawlStep(new Uri("http://www.biz3.org/"), 1)
            });
            Assert.AreEqual(1, crawlQueue.Count);
            CrawlerQueueEntry entry = crawlQueue.Pop();

            Assert.AreEqual(0, crawlQueue.Count);
            Assert.NotNull(entry);
            Assert.NotNull(entry.CrawlStep);
            Assert.NotNull(entry.Properties);
            Assert.NotNull(entry.Referrer);
            Assert.AreEqual(0, entry.CrawlStep.Depth);
            Assert.AreEqual("http://www.biz.org/", entry.CrawlStep.Uri.ToString());
            Assert.AreEqual("one", entry.Properties.Keys.First());
            Assert.AreEqual("two", entry.Properties.Keys.Skip(1).First());
            Assert.AreEqual("three", entry.Properties.Keys.Skip(2).First());
            Assert.AreEqual("string", entry.Properties["one"]);
            Assert.AreEqual(123, entry.Properties["two"]);
            Assert.AreEqual(now, entry.Properties["three"]);
            Assert.AreEqual(0, crawlQueue.Count);
        }
Ejemplo n.º 2
0
 public void Test3(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     crawlQueue.Push(new CrawlerQueueEntry());
     crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
 }
Ejemplo n.º 3
0
 public void Test3(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     crawlQueue.Push(new CrawlerQueueEntry());
     crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
 }
Ejemplo n.º 4
0
 public void Test5(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     DateTime now = DateTime.Now;
     crawlQueue.Push(new CrawlerQueueEntry
         {
             CrawlStep = new CrawlStep(new Uri("http://www.biz.org/"), 0),
             Properties = new Dictionary<string, object>
                 {
                     {"one", "string"},
                     {"two", 123},
                     {"three", now},
                 },
             Referrer = new CrawlStep(new Uri("http://www.biz3.org/"), 1)
         });
     Assert.AreEqual(1, crawlQueue.Count);
     CrawlerQueueEntry entry = crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
     Assert.NotNull(entry);
     Assert.NotNull(entry.CrawlStep);
     Assert.NotNull(entry.Properties);
     Assert.NotNull(entry.Referrer);
     Assert.AreEqual(0, entry.CrawlStep.Depth);
     Assert.AreEqual("http://www.biz.org/", entry.CrawlStep.Uri.ToString());
     Assert.AreEqual("one", entry.Properties.Keys.First());
     Assert.AreEqual("two", entry.Properties.Keys.Skip(1).First());
     Assert.AreEqual("three", entry.Properties.Keys.Skip(2).First());
     Assert.AreEqual("string", entry.Properties["one"]);
     Assert.AreEqual(123, entry.Properties["two"]);
     Assert.AreEqual(now, entry.Properties["three"]);
     Assert.AreEqual(0, crawlQueue.Count);
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Start crawl process
        /// </summary>
        public virtual async Task CrawlAsync()
        {
            if (this.m_OnlyOneCrawlPerInstance)
            {
                throw new InvalidOperationException("Crawler instance cannot be reused");
            }

            this.m_OnlyOneCrawlPerInstance = true;

            var parameters = new Parameter[]
            {
                new TypedParameter(typeof(Uri), this.m_BaseUri),
                new NamedParameter("crawlStart", this.m_BaseUri),
                new TypedParameter(typeof(Crawler), this),
            };

            this.m_CrawlerQueue   = this.m_LifetimeScope.Resolve <ICrawlerQueue>(parameters);
            parameters            = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerQueue), this.m_CrawlerQueue)).ToArray();
            this.m_CrawlerHistory = this.m_LifetimeScope.Resolve <ICrawlerHistory>(parameters);
            parameters            = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerHistory), this.m_CrawlerHistory)).ToArray();
            this.m_TaskRunner     = this.m_LifetimeScope.Resolve <ITaskRunner>(parameters);
            parameters            = parameters.AddToEnd(new TypedParameter(typeof(ITaskRunner), this.m_TaskRunner)).ToArray();
            this.m_Logger         = this.m_LifetimeScope.Resolve <ILog>(parameters);
            parameters            = parameters.AddToEnd(new TypedParameter(typeof(ILog), this.m_Logger)).ToArray();
            this.m_CrawlerRules   = this.m_LifetimeScope.Resolve <ICrawlerRules>(parameters);
            this.m_Logger.Verbose("Crawl started @ {0}", this.m_BaseUri);
            this.m_WebDownloaderFactory = this.m_LifetimeScope.Resolve <Func <IWebDownloader> >();
            using (this.m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                this.m_Crawling = true;
                this.m_Runtime  = Stopwatch.StartNew();

                if (this.m_CrawlerQueue.Count > 0)
                {
                    // Resume enabled
                    ProcessQueue();
                }
                else
                {
                    await this.AddStepAsync(this.m_BaseUri, 0);
                }

                if (!this.m_CrawlStopped)
                {
                    this.m_CrawlCompleteEvent.WaitOne();
                }

                this.m_Runtime.Stop();
                this.m_Crawling = false;
            }

            if (this.m_Cancelled)
            {
                OnCancelled();
            }

            this.m_Logger.Verbose("Crawl ended @ {0} in {1}", this.m_BaseUri, this.m_Runtime.Elapsed);
            OnCrawlFinished();
        }
Ejemplo n.º 6
0
 public void Test4(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     crawlQueue.Push(new CrawlerQueueEntry());
     crawlQueue.Pop();
     Assert.AreEqual(0, crawlQueue.Count);
     var actualValue = crawlQueue.Pop();
     Assert.IsNull(actualValue);
 }
Ejemplo n.º 7
0
        public void Test4(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            crawlQueue.Push(new CrawlerQueueEntry());
            crawlQueue.Pop();
            Assert.AreEqual(0, crawlQueue.Count);
            var actualValue = crawlQueue.Pop();

            Assert.IsNull(actualValue);
        }
Ejemplo n.º 8
0
        public void Test1(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            Assert.AreEqual(0, crawlQueue.Count);

            if (crawlQueue is IDisposable)
            {
                ((IDisposable)crawlQueue).Dispose();
            }
        }
Ejemplo n.º 9
0
        public void Test1(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            Assert.AreEqual(0, crawlQueue.Count);

            if(crawlQueue is IDisposable)
            {
                ((IDisposable)crawlQueue).Dispose();
            }
        }
Ejemplo n.º 10
0
        public void Test2(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            crawlQueue.Push(new CrawlerQueueEntry());
            Assert.AreEqual(1, crawlQueue.Count);

            if (crawlQueue is IDisposable)
            {
                ((IDisposable)crawlQueue).Dispose();
            }
        }
Ejemplo n.º 11
0
        public void Test2(ICrawlerQueue crawlQueue)
        {
            Assert.NotNull(crawlQueue);
            crawlQueue.Push(new CrawlerQueueEntry());
            Assert.AreEqual(1, crawlQueue.Count);

            if (crawlQueue is IDisposable)
            {
                ((IDisposable)crawlQueue).Dispose();
            }
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Start crawl process
        /// </summary>
        public virtual void Crawl()
        {
            if (m_Crawling)
            {
                throw new InvalidOperationException("Crawler already running");
            }

            Parameter[] parameters = new Parameter[]
            {
                new TypedParameter(typeof(Uri), m_BaseUri),
                new NamedParameter("crawlStart", m_BaseUri),
                new NamedParameter("resume", false),
                new NamedParameter("crawler", this),
            };
            m_CrawlerQueue   = m_LifetimeScope.Resolve <ICrawlerQueue>(parameters);
            m_CrawlerHistory = m_LifetimeScope.Resolve <ICrawlerHistory>(parameters);
            m_Robot          = AdhereToRobotRules ? m_LifetimeScope.Resolve <IRobot>(parameters) : new DummyRobot();
            m_TaskRunner     = m_LifetimeScope.Resolve <ITaskRunner>(parameters);
            m_Logger         = m_LifetimeScope.Resolve <ILog>(parameters);

            m_Logger.Verbose("Crawl started @ {0}", m_BaseUri);
            using (m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                m_Crawling = true;
                m_Runtime  = Stopwatch.StartNew();
                AddStep(m_BaseUri, 0);
                if (!m_CrawlStopped)
                {
                    m_CrawlCompleteEvent.WaitOne();
                }

                m_Runtime.Stop();
                m_Crawling = false;
            }

            if (m_Cancelled)
            {
                OnCancelled();
            }

            m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed);
            OnCrawlFinished();
        }
Ejemplo n.º 13
0
 public void Test1(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     Assert.AreEqual(0, crawlQueue.Count);
 }
Ejemplo n.º 14
0
 public void Test1(ICrawlerQueue crawlQueue)
 {
     Assert.NotNull(crawlQueue);
     Assert.AreEqual(0, crawlQueue.Count);
 }
Ejemplo n.º 15
0
        /// <summary>
        /// Start crawl process
        /// </summary>
        public virtual void Crawl()
        {
            if (m_Crawling)
            {
                throw new InvalidOperationException("Crawler already running");
            }

            Parameter[] parameters = new Parameter[]
                {
                    new TypedParameter(typeof (Uri), m_BaseUri),
                    new NamedParameter("crawlStart", m_BaseUri),
                    new NamedParameter("resume", false),
                    new NamedParameter("crawler", this),
                };
            m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters);
            m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters);
            m_Robot = AdhereToRobotRules ? m_LifetimeScope.Resolve<IRobot>(parameters) : new DummyRobot();
            m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters);
            m_Logger = m_LifetimeScope.Resolve<ILog>(parameters);

            m_Logger.Verbose("Crawl started @ {0}", m_BaseUri);
            using (m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                m_Crawling = true;
                m_Runtime = Stopwatch.StartNew();
                AddStep(m_BaseUri, 0);
                if (!m_CrawlStopped)
                {
                    m_CrawlCompleteEvent.WaitOne();
                }

                m_Runtime.Stop();
                m_Crawling = false;
            }

            if (m_Cancelled)
            {
                OnCancelled();
            }

            m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed);
            OnCrawlFinished();
        }
Ejemplo n.º 16
0
        /// <summary>
        /// 	Start crawl process
        /// </summary>
        public virtual void Crawl()
        {
            if (m_OnlyOneCrawlPerInstance)
            {
                throw new InvalidOperationException("Crawler instance cannot be reused");
            }

            m_OnlyOneCrawlPerInstance = true;

            Parameter[] parameters = new Parameter[]
                {
                    new TypedParameter(typeof (Uri), m_BaseUri),
                    new NamedParameter("crawlStart", m_BaseUri),
                    new TypedParameter(typeof (Crawler), this),
                };
            m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerQueue), m_CrawlerQueue)).ToArray();
            m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerHistory), m_CrawlerHistory)).ToArray();
            m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ITaskRunner), m_TaskRunner)).ToArray();
            m_Logger = m_LifetimeScope.Resolve<ILog>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ILog), m_Logger)).ToArray();
            m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters);
            m_Logger.Verbose("Crawl started @ {0}", m_BaseUri);
            m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>();
            using (m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                m_Crawling = true;
                m_Runtime = Stopwatch.StartNew();

                if (m_CrawlerQueue.Count > 0)
                {
                    // Resume enabled
                    ProcessQueue();
                }
                else
                {
                    AddStep(m_BaseUri, 0);
                }

                if (!m_CrawlStopped)
                {
                    m_CrawlCompleteEvent.WaitOne();
                }

                m_Runtime.Stop();
                m_Crawling = false;
            }

            if (m_Cancelled)
            {
                OnCancelled();
            }

            m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed);
            OnCrawlFinished();
        }
Ejemplo n.º 17
0
        /// <summary>
        /// 	Start crawl process
        /// </summary>
        public virtual void Crawl()
        {
            using (var stream = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite.txt", Encoding.UTF8))
            {
                var jsonStr = stream.ReadToEnd();
                var policy = new CacheItemPolicy();
                policy.Priority = CacheItemPriority.NotRemovable;
                policy.AbsoluteExpiration = DateTimeOffset.Now.AddDays(1);
                cache.Set(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite", jsonStr, policy);
                Console.WriteLine("cache --" + AppDomain.CurrentDomain.BaseDirectory + " :" + cache.Get(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite"));
            }
            
            if (m_OnlyOneCrawlPerInstance)
            {
                throw new InvalidOperationException("Crawler instance cannot be reused");
            }

            m_OnlyOneCrawlPerInstance = true;

            Parameter[] parameters = new Parameter[]
				{
					new TypedParameter(typeof (Uri), m_BaseUri),
					new NamedParameter("crawlStart", m_BaseUri),
					new TypedParameter(typeof (Crawler), this),
				};
            m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerQueue), m_CrawlerQueue)).ToArray();
            m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerHistory), m_CrawlerHistory)).ToArray();
            m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ITaskRunner), m_TaskRunner)).ToArray();
            m_Logger = m_LifetimeScope.Resolve<ILog>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ILog), m_Logger)).ToArray();
            m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters);
            m_Logger.Verbose("Crawl started @ {0}", m_BaseUri);
            m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>();
            using (m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                m_Crawling = true;
                m_Runtime = Stopwatch.StartNew();

                if (m_CrawlerQueue.Count > 0)
                {
                    // Resume enabled
                    ProcessQueue();
                }
                else
                {
                    AddStep(m_BaseUri, 0);
                }

                if (!m_CrawlStopped)
                {
                    m_CrawlCompleteEvent.WaitOne();
                }

                m_Runtime.Stop();
                m_Crawling = false;
            }

            if (m_Cancelled)
            {
                OnCancelled();
            }

            m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed);
            OnCrawlFinished();
        }