예제 #1
0
        protected virtual async Task CrawlSite()
        {
            while (!_crawlComplete)
            {
                RunPreWorkChecks();

                var linksToScheduleCount = _scheduler.Count;
                if (linksToScheduleCount > 0)
                {
                    Log.Debug($"There are [{linksToScheduleCount}] links to schedule...");
                    _threadManager.DoWork(async() => await ProcessPage(_scheduler.GetNext()));
                }
                else if (!_threadManager.HasRunningThreads() && _processingPageCount < 1)//Ok that _processingPageCount could be a race condition, will be caught on the next loop iteration
                {
                    Log.Debug("No links to schedule, no threads/tasks in progress...");
                    _crawlComplete = true;
                }
                else
                {
                    Log.Debug("Waiting for links to be scheduled...");

                    //Beware of issues here... https://github.com/sjdirect/abot/issues/203
                    await Task.Delay(2500).ConfigureAwait(false);
                }
            }
        }
예제 #2
0
        protected virtual void Crawl()
        {
            while (!_crawlComplete)
            {
                RunPreWorkChecks();

                if (_crawlPause)
                {
                    _logger.LogWarning("爬行线程暂停中...");
                    Thread.Sleep(2500);
                    continue;
                }

                if (_scheduler.Count > 0)
                {
                    _logger.LogInformation($"当前队列有[{_scheduler.Count}]个待爬链接");
                    _threadManager.DoWork(() => ProcessPage(_scheduler.GetNext()));
                }
                else if (!_threadManager.HasRunningThreads())
                {
                    _crawlComplete = true;
                }
                else
                {
                    _logger.LogDebug("Waiting for links to be scheduled...");
                    Thread.Sleep(2500);
                }
            }
        }
예제 #3
0
        public void HasRunningThreads()
        {
            //No threads should be running
            Assert.IsFalse(_unitUnderTest.HasRunningThreads());

            //Add word to be run on a thread
            _unitUnderTest.DoWork(() => System.Threading.Thread.Sleep(300));
            System.Threading.Thread.Sleep(20);

            //Should have 1 running thread
            Assert.IsTrue(_unitUnderTest.HasRunningThreads());

            //Wait for the 1 running thread to finish
            System.Threading.Thread.Sleep(400);

            //Should have 0 threads running since the thread should have completed by now
            Assert.IsFalse(_unitUnderTest.HasRunningThreads());
        }
예제 #4
0
        public void DoWork_SingleThreaded_WorkIsCompletedSynchronously()
        {
            _unitUnderTest = GetInstance(1);

            int count = 0;

            for (int i = 0; i < MAXTHREADS; i++)
            {
                _unitUnderTest.DoWork(() =>
                {
                    System.Threading.Thread.Sleep(5);
                    Interlocked.Increment(ref count);
                });
            }

            Assert.AreEqual(MAXTHREADS, count);
        }
예제 #5
0
파일: WebCrawler.cs 프로젝트: yhtsnda/abot
        protected virtual void CrawlSite()
        {
            while (!_crawlComplete)
            {
                RunPreWorkChecks();

                if (_scheduler.Count > 0)
                {
                    _threadManager.DoWork(() => ProcessPage(_scheduler.GetNext()));
                }
                else if (!_threadManager.HasRunningThreads())
                {
                    _crawlComplete = true;
                }
                else
                {
                    _logger.DebugFormat("Waiting for links to be scheduled...");
                    Thread.Sleep(2500);
                }
            }
        }
예제 #6
0
        //private CrawlConfiguration GetCrawlConfigurationFromConfigFile()
        //{
        //    AbotConfigurationSectionHandler configFromFile = AbotConfigurationSectionHandler.LoadFromXml();

        //    if (configFromFile == null)
        //        throw new InvalidOperationException("abot config section was NOT found");

        //    _logger.LogDebug($"abot config section was found");
        //    return configFromFile.Convert();
        //}

        protected virtual async Task CrawlSite()
        {
            while (!_crawlComplete)
            {
                RunPreWorkChecks();

                if (_scheduler.Count > 0)
                {
                    _threadManager.DoWork(() => ProcessPageAsync(_scheduler.GetNext()));
                }
                else if (!_threadManager.HasRunningThreads())
                {
                    _crawlComplete = true;
                }
                else
                {
                    _logger.LogDebug($"Waiting for links to be scheduled...");
                    await Task.Delay(2500);
                }
            }
        }
예제 #7
0
 async Task CrawlSite()
 {
     while (!_crawlComplete)
     {
         RunPreWorkChecks();
         if (_scheduler.Count > 0)
         {
             var temp = _scheduler.GetNext();
             await _threadManager.DoWork(async() =>
             {
                 await ProcessPage(temp).ConfigureAwait(false);
             }).ConfigureAwait(false);
         }
         else if (!_threadManager.HasRunningThreads())
         {
             _crawlComplete = true;
         }
         else
         {
             await Task.Delay(3500).ConfigureAwait(false);
         }
     }
 }
예제 #8
0
        public void DoWork_SingleThreaded_WorkIsCompletedSynchronously()
        {
            _unitUnderTest = GetInstance(1);

            int count = 0;
            for (int i = 0; i < MAXTHREADS; i++)
            {
                _unitUnderTest.DoWork(() =>
                {
                    System.Threading.Thread.Sleep(5);
                    Interlocked.Increment(ref count);
                });
            }

            Assert.AreEqual(MAXTHREADS, count);
        }