public void Test5(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); DateTime now = DateTime.Now; crawlQueue.Push(new CrawlerQueueEntry { CrawlStep = new CrawlStep(new Uri("http://www.biz.org/"), 0), Properties = new Dictionary <string, object> { { "one", "string" }, { "two", 123 }, { "three", now }, }, Referrer = new CrawlStep(new Uri("http://www.biz3.org/"), 1) }); Assert.AreEqual(1, crawlQueue.Count); CrawlerQueueEntry entry = crawlQueue.Pop(); Assert.AreEqual(0, crawlQueue.Count); Assert.NotNull(entry); Assert.NotNull(entry.CrawlStep); Assert.NotNull(entry.Properties); Assert.NotNull(entry.Referrer); Assert.AreEqual(0, entry.CrawlStep.Depth); Assert.AreEqual("http://www.biz.org/", entry.CrawlStep.Uri.ToString()); Assert.AreEqual("one", entry.Properties.Keys.First()); Assert.AreEqual("two", entry.Properties.Keys.Skip(1).First()); Assert.AreEqual("three", entry.Properties.Keys.Skip(2).First()); Assert.AreEqual("string", entry.Properties["one"]); Assert.AreEqual(123, entry.Properties["two"]); Assert.AreEqual(now, entry.Properties["three"]); Assert.AreEqual(0, crawlQueue.Count); }
public void Test3(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); crawlQueue.Push(new CrawlerQueueEntry()); crawlQueue.Pop(); Assert.AreEqual(0, crawlQueue.Count); }
public void Test5(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); DateTime now = DateTime.Now; crawlQueue.Push(new CrawlerQueueEntry { CrawlStep = new CrawlStep(new Uri("http://www.biz.org/"), 0), Properties = new Dictionary<string, object> { {"one", "string"}, {"two", 123}, {"three", now}, }, Referrer = new CrawlStep(new Uri("http://www.biz3.org/"), 1) }); Assert.AreEqual(1, crawlQueue.Count); CrawlerQueueEntry entry = crawlQueue.Pop(); Assert.AreEqual(0, crawlQueue.Count); Assert.NotNull(entry); Assert.NotNull(entry.CrawlStep); Assert.NotNull(entry.Properties); Assert.NotNull(entry.Referrer); Assert.AreEqual(0, entry.CrawlStep.Depth); Assert.AreEqual("http://www.biz.org/", entry.CrawlStep.Uri.ToString()); Assert.AreEqual("one", entry.Properties.Keys.First()); Assert.AreEqual("two", entry.Properties.Keys.Skip(1).First()); Assert.AreEqual("three", entry.Properties.Keys.Skip(2).First()); Assert.AreEqual("string", entry.Properties["one"]); Assert.AreEqual(123, entry.Properties["two"]); Assert.AreEqual(now, entry.Properties["three"]); Assert.AreEqual(0, crawlQueue.Count); }
/// <summary> /// Start crawl process /// </summary> public virtual async Task CrawlAsync() { if (this.m_OnlyOneCrawlPerInstance) { throw new InvalidOperationException("Crawler instance cannot be reused"); } this.m_OnlyOneCrawlPerInstance = true; var parameters = new Parameter[] { new TypedParameter(typeof(Uri), this.m_BaseUri), new NamedParameter("crawlStart", this.m_BaseUri), new TypedParameter(typeof(Crawler), this), }; this.m_CrawlerQueue = this.m_LifetimeScope.Resolve <ICrawlerQueue>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerQueue), this.m_CrawlerQueue)).ToArray(); this.m_CrawlerHistory = this.m_LifetimeScope.Resolve <ICrawlerHistory>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerHistory), this.m_CrawlerHistory)).ToArray(); this.m_TaskRunner = this.m_LifetimeScope.Resolve <ITaskRunner>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ITaskRunner), this.m_TaskRunner)).ToArray(); this.m_Logger = this.m_LifetimeScope.Resolve <ILog>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ILog), this.m_Logger)).ToArray(); this.m_CrawlerRules = this.m_LifetimeScope.Resolve <ICrawlerRules>(parameters); this.m_Logger.Verbose("Crawl started @ {0}", this.m_BaseUri); this.m_WebDownloaderFactory = this.m_LifetimeScope.Resolve <Func <IWebDownloader> >(); using (this.m_CrawlCompleteEvent = new ManualResetEvent(false)) { this.m_Crawling = true; this.m_Runtime = Stopwatch.StartNew(); if (this.m_CrawlerQueue.Count > 0) { // Resume enabled ProcessQueue(); } else { await this.AddStepAsync(this.m_BaseUri, 0); } if (!this.m_CrawlStopped) { this.m_CrawlCompleteEvent.WaitOne(); } this.m_Runtime.Stop(); this.m_Crawling = false; } if (this.m_Cancelled) { OnCancelled(); } this.m_Logger.Verbose("Crawl ended @ {0} in {1}", this.m_BaseUri, this.m_Runtime.Elapsed); OnCrawlFinished(); }
public void Test4(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); crawlQueue.Push(new CrawlerQueueEntry()); crawlQueue.Pop(); Assert.AreEqual(0, crawlQueue.Count); var actualValue = crawlQueue.Pop(); Assert.IsNull(actualValue); }
public void Test1(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); Assert.AreEqual(0, crawlQueue.Count); if (crawlQueue is IDisposable) { ((IDisposable)crawlQueue).Dispose(); } }
public void Test1(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); Assert.AreEqual(0, crawlQueue.Count); if(crawlQueue is IDisposable) { ((IDisposable)crawlQueue).Dispose(); } }
public void Test2(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); crawlQueue.Push(new CrawlerQueueEntry()); Assert.AreEqual(1, crawlQueue.Count); if (crawlQueue is IDisposable) { ((IDisposable)crawlQueue).Dispose(); } }
/// <summary> /// Start crawl process /// </summary> public virtual void Crawl() { if (m_Crawling) { throw new InvalidOperationException("Crawler already running"); } Parameter[] parameters = new Parameter[] { new TypedParameter(typeof(Uri), m_BaseUri), new NamedParameter("crawlStart", m_BaseUri), new NamedParameter("resume", false), new NamedParameter("crawler", this), }; m_CrawlerQueue = m_LifetimeScope.Resolve <ICrawlerQueue>(parameters); m_CrawlerHistory = m_LifetimeScope.Resolve <ICrawlerHistory>(parameters); m_Robot = AdhereToRobotRules ? m_LifetimeScope.Resolve <IRobot>(parameters) : new DummyRobot(); m_TaskRunner = m_LifetimeScope.Resolve <ITaskRunner>(parameters); m_Logger = m_LifetimeScope.Resolve <ILog>(parameters); m_Logger.Verbose("Crawl started @ {0}", m_BaseUri); using (m_CrawlCompleteEvent = new ManualResetEvent(false)) { m_Crawling = true; m_Runtime = Stopwatch.StartNew(); AddStep(m_BaseUri, 0); if (!m_CrawlStopped) { m_CrawlCompleteEvent.WaitOne(); } m_Runtime.Stop(); m_Crawling = false; } if (m_Cancelled) { OnCancelled(); } m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed); OnCrawlFinished(); }
public void Test1(ICrawlerQueue crawlQueue) { Assert.NotNull(crawlQueue); Assert.AreEqual(0, crawlQueue.Count); }
/// <summary> /// Start crawl process /// </summary> public virtual void Crawl() { if (m_Crawling) { throw new InvalidOperationException("Crawler already running"); } Parameter[] parameters = new Parameter[] { new TypedParameter(typeof (Uri), m_BaseUri), new NamedParameter("crawlStart", m_BaseUri), new NamedParameter("resume", false), new NamedParameter("crawler", this), }; m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters); m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters); m_Robot = AdhereToRobotRules ? m_LifetimeScope.Resolve<IRobot>(parameters) : new DummyRobot(); m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters); m_Logger = m_LifetimeScope.Resolve<ILog>(parameters); m_Logger.Verbose("Crawl started @ {0}", m_BaseUri); using (m_CrawlCompleteEvent = new ManualResetEvent(false)) { m_Crawling = true; m_Runtime = Stopwatch.StartNew(); AddStep(m_BaseUri, 0); if (!m_CrawlStopped) { m_CrawlCompleteEvent.WaitOne(); } m_Runtime.Stop(); m_Crawling = false; } if (m_Cancelled) { OnCancelled(); } m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed); OnCrawlFinished(); }
/// <summary> /// Start crawl process /// </summary> public virtual void Crawl() { if (m_OnlyOneCrawlPerInstance) { throw new InvalidOperationException("Crawler instance cannot be reused"); } m_OnlyOneCrawlPerInstance = true; Parameter[] parameters = new Parameter[] { new TypedParameter(typeof (Uri), m_BaseUri), new NamedParameter("crawlStart", m_BaseUri), new TypedParameter(typeof (Crawler), this), }; m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerQueue), m_CrawlerQueue)).ToArray(); m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerHistory), m_CrawlerHistory)).ToArray(); m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ITaskRunner), m_TaskRunner)).ToArray(); m_Logger = m_LifetimeScope.Resolve<ILog>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ILog), m_Logger)).ToArray(); m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters); m_Logger.Verbose("Crawl started @ {0}", m_BaseUri); m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>(); using (m_CrawlCompleteEvent = new ManualResetEvent(false)) { m_Crawling = true; m_Runtime = Stopwatch.StartNew(); if (m_CrawlerQueue.Count > 0) { // Resume enabled ProcessQueue(); } else { AddStep(m_BaseUri, 0); } if (!m_CrawlStopped) { m_CrawlCompleteEvent.WaitOne(); } m_Runtime.Stop(); m_Crawling = false; } if (m_Cancelled) { OnCancelled(); } m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed); OnCrawlFinished(); }
/// <summary> /// Start crawl process /// </summary> public virtual void Crawl() { using (var stream = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite.txt", Encoding.UTF8)) { var jsonStr = stream.ReadToEnd(); var policy = new CacheItemPolicy(); policy.Priority = CacheItemPriority.NotRemovable; policy.AbsoluteExpiration = DateTimeOffset.Now.AddDays(1); cache.Set(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite", jsonStr, policy); Console.WriteLine("cache --" + AppDomain.CurrentDomain.BaseDirectory + " :" + cache.Get(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite")); } if (m_OnlyOneCrawlPerInstance) { throw new InvalidOperationException("Crawler instance cannot be reused"); } m_OnlyOneCrawlPerInstance = true; Parameter[] parameters = new Parameter[] { new TypedParameter(typeof (Uri), m_BaseUri), new NamedParameter("crawlStart", m_BaseUri), new TypedParameter(typeof (Crawler), this), }; m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerQueue), m_CrawlerQueue)).ToArray(); m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerHistory), m_CrawlerHistory)).ToArray(); m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ITaskRunner), m_TaskRunner)).ToArray(); m_Logger = m_LifetimeScope.Resolve<ILog>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ILog), m_Logger)).ToArray(); m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters); m_Logger.Verbose("Crawl started @ {0}", m_BaseUri); m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>(); using (m_CrawlCompleteEvent = new ManualResetEvent(false)) { m_Crawling = true; m_Runtime = Stopwatch.StartNew(); if (m_CrawlerQueue.Count > 0) { // Resume enabled ProcessQueue(); } else { AddStep(m_BaseUri, 0); } if (!m_CrawlStopped) { m_CrawlCompleteEvent.WaitOne(); } m_Runtime.Stop(); m_Crawling = false; } if (m_Cancelled) { OnCancelled(); } m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed); OnCrawlFinished(); }