public void ErrorOccurred_ErrorExceptionIsNull_ReturnsFalse() { CrawlResult unitUnderTest = new CrawlResult(); unitUnderTest.ErrorException = null; Assert.IsFalse(unitUnderTest.ErrorOccurred); Assert.IsNull(unitUnderTest.ErrorException); }
public void Constructor_ValidUri_CreatesInstance() { CrawlResult unitUnderTest = new CrawlResult(); Assert.AreEqual(default(TimeSpan), unitUnderTest.Elapsed); Assert.AreEqual(null, unitUnderTest.ErrorException); Assert.AreEqual(false, unitUnderTest.ErrorOccurred); Assert.AreEqual(null, unitUnderTest.RootUri); Assert.AreEqual(null, unitUnderTest.CrawlContext); }
public void ErrorOccurred_ErrorExceptionNotNull_ReturnsTrue() { CrawlResult unitUnderTest = new CrawlResult(); Exception ex = new Exception("oh no"); unitUnderTest.ErrorException = ex; Assert.IsTrue(unitUnderTest.ErrorOccurred); Assert.AreSame(ex, unitUnderTest.ErrorException); }
/// <summary> /// Begins a synchronous crawl using the uri param, subscribe to events to process data as it becomes available /// </summary> public virtual CrawlResult Crawl(Uri uri) { if (uri == null) throw new ArgumentNullException("uri"); _crawlContext.RootUri = uri; _crawlResult = new CrawlResult(); _crawlResult.RootUri = _crawlContext.RootUri; _crawlResult.CrawlContext = _crawlContext; _crawlComplete = false; _logger.InfoFormat("About to crawl site [{0}]", uri.AbsoluteUri); if (_memoryManager != null) { _crawlContext.MemoryUsageBeforeCrawlInMb = _memoryManager.GetCurrentUsageInMb(); _logger.InfoFormat("Starting memory usage for site [{0}] is [{1}mb]", uri.AbsoluteUri, _crawlContext.MemoryUsageBeforeCrawlInMb); } PrintConfigValues(_crawlContext.CrawlConfiguration); _scheduler.Add(new PageToCrawl(uri) { ParentUri = uri, IsInternal = true, IsRoot = true }); _crawlContext.CrawlStartDate = DateTime.Now; Stopwatch timer = Stopwatch.StartNew(); if (_crawlContext.CrawlConfiguration.CrawlTimeoutSeconds > 0) { _timeoutTimer = new Timer(_crawlContext.CrawlConfiguration.CrawlTimeoutSeconds * 1000); _timeoutTimer.Elapsed += HandleCrawlTimeout; _timeoutTimer.Start(); } try { VerifyRequiredAvailableMemory(); CrawlSite(); } catch (Exception e) { _crawlResult.ErrorException = e; _logger.FatalFormat("An error occurred while crawling site [{0}]", uri); _logger.Fatal(e); } finally { if(_threadManager != null) _threadManager.Dispose(); } if(_timeoutTimer != null) _timeoutTimer.Stop(); timer.Stop(); if (_memoryManager != null) { _crawlContext.MemoryUsageAfterCrawlInMb = _memoryManager.GetCurrentUsageInMb(); _logger.InfoFormat("Ending memory usage for site [{0}] is [{1}mb]", uri.AbsoluteUri, _crawlContext.MemoryUsageAfterCrawlInMb); } _crawlResult.Elapsed = timer.Elapsed; _logger.InfoFormat("Crawl complete for site [{0}]: [{1}]", _crawlResult.RootUri.AbsoluteUri, _crawlResult.Elapsed); return _crawlResult; }
static void LogCrawledCompletedStatistics(CrawlResult result) { _log.Info("Pages downloaded: " + pagesDownloaded); _log.Info("Pages downloaded per second: " + pagesDownloaded / result.Elapsed.TotalSeconds); _log.Info("GB downloaded: " + bytesDownloaded / 1024 / 1024); _log.Info("GB downloaded per second: " + (float)bytesDownloaded / 1024f / 1024f / (float)result.Elapsed.TotalSeconds); _log.Info("Time: " + result.Elapsed); }