예제 #1
0
        /// <summary>
        /// 	Start crawl process
        /// </summary>
        public virtual void Crawl()
        {
            if (m_OnlyOneCrawlPerInstance)
            {
                throw new InvalidOperationException("Crawler instance cannot be reused");
            }

            m_OnlyOneCrawlPerInstance = true;

            Parameter[] parameters = new Parameter[]
                {
                    new TypedParameter(typeof (Uri), m_BaseUri),
                    new NamedParameter("crawlStart", m_BaseUri),
                    new TypedParameter(typeof (Crawler), this),
                };
            m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerQueue), m_CrawlerQueue)).ToArray();
            m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ICrawlerHistory), m_CrawlerHistory)).ToArray();
            m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ITaskRunner), m_TaskRunner)).ToArray();
            m_Logger = m_LifetimeScope.Resolve<ILog>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof (ILog), m_Logger)).ToArray();
            m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters);
            m_Logger.Verbose("Crawl started @ {0}", m_BaseUri);
            m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>();
            using (m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                m_Crawling = true;
                m_Runtime = Stopwatch.StartNew();

                if (m_CrawlerQueue.Count > 0)
                {
                    // Resume enabled
                    ProcessQueue();
                }
                else
                {
                    AddStep(m_BaseUri, 0);
                }

                if (!m_CrawlStopped)
                {
                    m_CrawlCompleteEvent.WaitOne();
                }

                m_Runtime.Stop();
                m_Crawling = false;
            }

            if (m_Cancelled)
            {
                OnCancelled();
            }

            m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed);
            OnCrawlFinished();
        }
예제 #2
0
        /// <summary>
        /// 	Start crawl process
        /// </summary>
        public virtual void Crawl()
        {
            using (var stream = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite.txt", Encoding.UTF8))
            {
                var jsonStr = stream.ReadToEnd();
                var policy = new CacheItemPolicy();
                policy.Priority = CacheItemPriority.NotRemovable;
                policy.AbsoluteExpiration = DateTimeOffset.Now.AddDays(1);
                cache.Set(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite", jsonStr, policy);
                Console.WriteLine("cache --" + AppDomain.CurrentDomain.BaseDirectory + " :" + cache.Get(AppDomain.CurrentDomain.BaseDirectory + "OriginalWebSite"));
            }
            
            if (m_OnlyOneCrawlPerInstance)
            {
                throw new InvalidOperationException("Crawler instance cannot be reused");
            }

            m_OnlyOneCrawlPerInstance = true;

            Parameter[] parameters = new Parameter[]
				{
					new TypedParameter(typeof (Uri), m_BaseUri),
					new NamedParameter("crawlStart", m_BaseUri),
					new TypedParameter(typeof (Crawler), this),
				};
            m_CrawlerQueue = m_LifetimeScope.Resolve<ICrawlerQueue>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerQueue), m_CrawlerQueue)).ToArray();
            m_CrawlerHistory = m_LifetimeScope.Resolve<ICrawlerHistory>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ICrawlerHistory), m_CrawlerHistory)).ToArray();
            m_TaskRunner = m_LifetimeScope.Resolve<ITaskRunner>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ITaskRunner), m_TaskRunner)).ToArray();
            m_Logger = m_LifetimeScope.Resolve<ILog>(parameters);
            parameters = parameters.AddToEnd(new TypedParameter(typeof(ILog), m_Logger)).ToArray();
            m_CrawlerRules = m_LifetimeScope.Resolve<ICrawlerRules>(parameters);
            m_Logger.Verbose("Crawl started @ {0}", m_BaseUri);
            m_WebDownloaderFactory = m_LifetimeScope.Resolve<Func<IWebDownloader>>();
            using (m_CrawlCompleteEvent = new ManualResetEvent(false))
            {
                m_Crawling = true;
                m_Runtime = Stopwatch.StartNew();

                if (m_CrawlerQueue.Count > 0)
                {
                    // Resume enabled
                    ProcessQueue();
                }
                else
                {
                    AddStep(m_BaseUri, 0);
                }

                if (!m_CrawlStopped)
                {
                    m_CrawlCompleteEvent.WaitOne();
                }

                m_Runtime.Stop();
                m_Crawling = false;
            }

            if (m_Cancelled)
            {
                OnCancelled();
            }

            m_Logger.Verbose("Crawl ended @ {0} in {1}", m_BaseUri, m_Runtime.Elapsed);
            OnCrawlFinished();
        }