public void StartTestCrawl() { Thread.Sleep(5000); var result = new CrawlerResultsDTO(); result.ExternalLinksList = new List<ExternalLinkDTO>(); result.ExternalLinksList.Add(new ExternalLinkDTO { LinkAnchor = "Test Run", LinkPath = "", OriginalPageLink = "", PageSeedLink = "" }); Task.Factory.StartNew(() => { proxy.ReturnCrawlingResults(result); }); }
public CrawlerResultsDTO StartCrawlingProcess(IEnumerable<SeedDTO> seedsToCrawl, int maxCrawlLevel = 2) { var timetracker = new Stopwatch(); timetracker.Start(); var startTime = DateTime.Now; _maxPageLevel = maxCrawlLevel; _forceStop = false; foreach (var seed in seedsToCrawl) { var startingAddress = seed.SeedDomainName; _internalLinksIdCounter = 1; if (startingAddress == string.Empty) { continue; } //if (_allLinks.Contains(startingAddress)) //{ // AddInternalLink(startingAddress, startingAddress, 0, StartingPageName); //} FindLinks(startingAddress, 0, startingAddress); while (_internalUnprocessedLinks.Count > 0 & _forceStop != true) { var selectedLink = _internalUnprocessedLinks.Pop(); selectedLink.IsProcessed = true; FindLinks(selectedLink.PageLink, selectedLink.PageLevel, startingAddress); } //_allLinks.Clear(); } if (_forceStop) { _forceStop = false; } timetracker.Stop(); var runingTime = timetracker.Elapsed.Seconds; //MessageBox.Show(RuningTime); var batchInfo = new BatchDTO(); batchInfo.CrawlingTime = runingTime; batchInfo.StartTime = startTime; batchInfo.NumberOfCrawledExternalLinks = _externalLinksDictionary.Count; batchInfo.NumberOfCrawledInternalLinks = _internalLinksIdCounter; batchInfo.SeedId = seedsToCrawl.FirstOrDefault().SeedIndex; var result = new CrawlerResultsDTO { BadLinksList = _badLinksList.ToList(), ExternalLinksList = _externalLinksDictionary.Select(pair => pair.Value).ToList(), InternalLinksList = _internalLinksDictionary.Select(pair => pair.Value).ToList(), BatchInfo= batchInfo, ProcessedSeed = seedsToCrawl.FirstOrDefault() }; return result; }