Beispiel #1
0
        public void ErrorOccurred_ErrorExceptionIsNull_ReturnsFalse()
        {
            CrawlResult unitUnderTest = new CrawlResult();
            unitUnderTest.ErrorException = null;

            Assert.IsFalse(unitUnderTest.ErrorOccurred);
            Assert.IsNull(unitUnderTest.ErrorException);
        }
Beispiel #2
0
 public void Constructor_ValidUri_CreatesInstance()
 {
     CrawlResult unitUnderTest = new CrawlResult();
     Assert.AreEqual(default(TimeSpan), unitUnderTest.Elapsed);
     Assert.AreEqual(null, unitUnderTest.ErrorException);
     Assert.AreEqual(false, unitUnderTest.ErrorOccurred);
     Assert.AreEqual(null, unitUnderTest.RootUri);
     Assert.AreEqual(null, unitUnderTest.CrawlContext);
 }
Beispiel #3
0
        public void ErrorOccurred_ErrorExceptionNotNull_ReturnsTrue()
        {
            CrawlResult unitUnderTest = new CrawlResult();
            Exception ex = new Exception("oh no");
            unitUnderTest.ErrorException = ex;

            Assert.IsTrue(unitUnderTest.ErrorOccurred);
            Assert.AreSame(ex, unitUnderTest.ErrorException);
        }
Beispiel #4
0
        /// <summary>
        /// Begins a synchronous crawl using the uri param, subscribe to events to process data as it becomes available
        /// </summary>
        public virtual CrawlResult Crawl(Uri uri)
        {
            if (uri == null)
                throw new ArgumentNullException("uri");

            _crawlContext.RootUri = uri;

            _crawlResult = new CrawlResult();
            _crawlResult.RootUri = _crawlContext.RootUri;
            _crawlResult.CrawlContext = _crawlContext;
            _crawlComplete = false;

            _logger.InfoFormat("About to crawl site [{0}]", uri.AbsoluteUri);

            if (_memoryManager != null)
            {
                _crawlContext.MemoryUsageBeforeCrawlInMb = _memoryManager.GetCurrentUsageInMb();
                _logger.InfoFormat("Starting memory usage for site [{0}] is [{1}mb]", uri.AbsoluteUri, _crawlContext.MemoryUsageBeforeCrawlInMb);
            }

            PrintConfigValues(_crawlContext.CrawlConfiguration);

            _scheduler.Add(new PageToCrawl(uri) { ParentUri = uri, IsInternal = true, IsRoot = true });

            _crawlContext.CrawlStartDate = DateTime.Now;
            Stopwatch timer = Stopwatch.StartNew();

            if (_crawlContext.CrawlConfiguration.CrawlTimeoutSeconds > 0)
            {
                _timeoutTimer = new Timer(_crawlContext.CrawlConfiguration.CrawlTimeoutSeconds * 1000);
                _timeoutTimer.Elapsed += HandleCrawlTimeout;
                _timeoutTimer.Start();
            }

            try
            {
                VerifyRequiredAvailableMemory();
                CrawlSite();
            }
            catch (Exception e)
            {
                _crawlResult.ErrorException = e;
                _logger.FatalFormat("An error occurred while crawling site [{0}]", uri);
                _logger.Fatal(e);
            }
            finally
            {
                if(_threadManager != null)
                    _threadManager.Dispose();
            }

            if(_timeoutTimer != null)
                _timeoutTimer.Stop();

            timer.Stop();

            if (_memoryManager != null)
            {
                _crawlContext.MemoryUsageAfterCrawlInMb = _memoryManager.GetCurrentUsageInMb();
                _logger.InfoFormat("Ending memory usage for site [{0}] is [{1}mb]", uri.AbsoluteUri, _crawlContext.MemoryUsageAfterCrawlInMb);
            }

            _crawlResult.Elapsed = timer.Elapsed;
            _logger.InfoFormat("Crawl complete for site [{0}]: [{1}]", _crawlResult.RootUri.AbsoluteUri, _crawlResult.Elapsed);

            return _crawlResult;
        }
Beispiel #5
0
 static void LogCrawledCompletedStatistics(CrawlResult result)
 {
     _log.Info("Pages downloaded: " + pagesDownloaded);
     _log.Info("Pages downloaded per second: " + pagesDownloaded / result.Elapsed.TotalSeconds);
     _log.Info("GB downloaded: " + bytesDownloaded / 1024 / 1024);
     _log.Info("GB downloaded per second: " + (float)bytesDownloaded / 1024f / 1024f / (float)result.Elapsed.TotalSeconds);
     _log.Info("Time: " + result.Elapsed);
 }