/// <summary> /// Start crawl process /// </summary> public virtual void Crawl() { if (m_OnlyOneCrawlPerInstance) { throw new InvalidOperationException("Crawler instance cannot be reused"); } m_OnlyOneCrawlPerInstance = true; Parameter[] parameters = new Parameter[] { new TypedParameter(typeof (Uri), m_BaseUri), new NamedParameter("crawlStart", m_BaseUri), new TypedParameter(typeof (Crawler), this), }; m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerQueue), m_CrawlerQueue)).ToArray(); m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerHistory), m_CrawlerHistory)).ToArray(); m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ITaskRunner), m_TaskRunner)).ToArray(); m_Logger = m_LifetimeScope.Resolve<ILog>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof (ILog), m_Logger)).ToArray(); m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters); m_Logger.Verbose("Crawl started @ {0}", m_BaseUri); m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>(); using (m_CrawlCompleteEvent = new ManualResetEvent(false)) { m_Crawling = true; m_Runtime = Stopwatch.StartNew(); if (m_CrawlerQueue.Count > 0) { // Resume enabled ProcessQueue(); } else { AddStep(m_BaseUri, 0); } if (!m_CrawlStopped) { m_CrawlCompleteEvent.WaitOne(); } m_Runtime.Stop(); m_Crawling = false; } if (m_Cancelled) { OnCancelled(); } m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed); OnCrawlFinished(); }
/// <summary> /// Start crawl process /// </summary> public virtual void Crawl() { using (var stream = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite.txt", Encoding.UTF8)) { var jsonStr = stream.ReadToEnd(); var policy = new CacheItemPolicy(); policy.Priority = CacheItemPriority.NotRemovable; policy.AbsoluteExpiration = DateTimeOffset.Now.AddDays(1); cache.Set(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite", jsonStr, policy); Console.WriteLine("cache --" + AppDomain.CurrentDomain.BaseDirectory + " :" + cache.Get(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite")); } if (m_OnlyOneCrawlPerInstance) { throw new InvalidOperationException("Crawler instance cannot be reused"); } m_OnlyOneCrawlPerInstance = true; Parameter[] parameters = new Parameter[] { new TypedParameter(typeof (Uri), m_BaseUri), new NamedParameter("crawlStart", m_BaseUri), new TypedParameter(typeof (Crawler), this), }; m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerQueue), m_CrawlerQueue)).ToArray(); m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerHistory), m_CrawlerHistory)).ToArray(); m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ITaskRunner), m_TaskRunner)).ToArray(); m_Logger = m_LifetimeScope.Resolve<ILog>(parameters); parameters = parameters.AddToEnd(new TypedParameter(typeof(ILog), m_Logger)).ToArray(); m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters); m_Logger.Verbose("Crawl started @ {0}", m_BaseUri); m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>(); using (m_CrawlCompleteEvent = new ManualResetEvent(false)) { m_Crawling = true; m_Runtime = Stopwatch.StartNew(); if (m_CrawlerQueue.Count > 0) { // Resume enabled ProcessQueue(); } else { AddStep(m_BaseUri, 0); } if (!m_CrawlStopped) { m_CrawlCompleteEvent.WaitOne(); } m_Runtime.Stop(); m_Crawling = false; } if (m_Cancelled) { OnCancelled(); } m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed); OnCrawlFinished(); }