protected virtual void Crawl() { while (!_crawlComplete) { RunPreWorkChecks(); if (_crawlPause) { _logger.LogWarning("爬行线程暂停中..."); Thread.Sleep(2500); continue; } if (_scheduler.Count > 0) { _logger.LogInformation($"当前队列有[{_scheduler.Count}]个待爬链接"); _threadManager.DoWork(() => ProcessPage(_scheduler.GetNext())); } else if (!_threadManager.HasRunningThreads()) { _crawlComplete = true; } else { _logger.LogDebug("Waiting for links to be scheduled..."); Thread.Sleep(2500); } } }
protected virtual async Task CrawlSite() { while (!_crawlComplete) { RunPreWorkChecks(); var linksToScheduleCount = _scheduler.Count; if (linksToScheduleCount > 0) { Log.Debug($"There are [{linksToScheduleCount}] links to schedule..."); _threadManager.DoWork(async() => await ProcessPage(_scheduler.GetNext())); } else if (!_threadManager.HasRunningThreads() && _processingPageCount < 1)//Ok that _processingPageCount could be a race condition, will be caught on the next loop iteration { Log.Debug("No links to schedule, no threads/tasks in progress..."); _crawlComplete = true; } else { Log.Debug("Waiting for links to be scheduled..."); //Beware of issues here... https://github.com/sjdirect/abot/issues/203 await Task.Delay(2500).ConfigureAwait(false); } } }
public void HasRunningThreads() { //No threads should be running Assert.IsFalse(_unitUnderTest.HasRunningThreads()); //Add word to be run on a thread _unitUnderTest.DoWork(() => System.Threading.Thread.Sleep(300)); System.Threading.Thread.Sleep(20); //Should have 1 running thread Assert.IsTrue(_unitUnderTest.HasRunningThreads()); //Wait for the 1 running thread to finish System.Threading.Thread.Sleep(400); //Should have 0 threads running since the thread should have completed by now Assert.IsFalse(_unitUnderTest.HasRunningThreads()); }
protected virtual void CrawlSite() { while (!_crawlComplete) { RunPreWorkChecks(); if (_scheduler.Count > 0) { _threadManager.DoWork(() => ProcessPage(_scheduler.GetNext())); } else if (!_threadManager.HasRunningThreads()) { _crawlComplete = true; } else { _logger.DebugFormat("Waiting for links to be scheduled..."); Thread.Sleep(2500); } } }
//private CrawlConfiguration GetCrawlConfigurationFromConfigFile() //{ // AbotConfigurationSectionHandler configFromFile = AbotConfigurationSectionHandler.LoadFromXml(); // if (configFromFile == null) // throw new InvalidOperationException("abot config section was NOT found"); // _logger.LogDebug($"abot config section was found"); // return configFromFile.Convert(); //} protected virtual async Task CrawlSite() { while (!_crawlComplete) { RunPreWorkChecks(); if (_scheduler.Count > 0) { _threadManager.DoWork(() => ProcessPageAsync(_scheduler.GetNext())); } else if (!_threadManager.HasRunningThreads()) { _crawlComplete = true; } else { _logger.LogDebug($"Waiting for links to be scheduled..."); await Task.Delay(2500); } } }
async Task CrawlSite() { while (!_crawlComplete) { RunPreWorkChecks(); if (_scheduler.Count > 0) { var temp = _scheduler.GetNext(); await _threadManager.DoWork(async() => { await ProcessPage(temp).ConfigureAwait(false); }).ConfigureAwait(false); } else if (!_threadManager.HasRunningThreads()) { _crawlComplete = true; } else { await Task.Delay(3500).ConfigureAwait(false); } } }