Beispiel #1
0
 private void Crawl()
 {
     try
     {
         LoadQueue();
         while (true)
         {
             Token.ThrowIfCancellationRequested();
             if (_linksQueue.Count == 0)
             {
                 _typeEnd = TypeEnd.Success;
                 break;
             }
             else if ((DateTime.Now - _timeStart).TotalHours > MaxHourReload)
             {
                 _typeEnd = TypeEnd.OverTime;
                 break;
             }
             else
             {
                 var    job           = _linksQueue.Dequeue();
                 int    statusProcess = ProcessJob(job);
                 string strLog        = string.Format("ss: {0} cQ: {1} tP: {2} cV: {3} pt: {4} {5} sst: {6} cmp: {7} {8}", _session, _linksQueue.Count, _company.TotalProduct, _countVisited, job.ProductId,
                                                      job.url, statusProcess, _companyId,
                                                      (this._company == null) ? "" : this._company.Domain);
                 if (EventReportRun != null)
                 {
                     EventReportRun(strLog);
                 }
                 _log.Info(strLog);
                 if (_linksQueue.Count == 0)
                 {
                     LoadQueue();
                 }
             }
         }
     }
     catch (OperationCanceledException)
     {
         _typeEnd = TypeEnd.Immediate;
         _producerPushCompanyReload.PublishString(_companyId.ToString(), true, 0);
         End();
         throw;
     }
     catch (Exception ex)
     {
         _typeEnd = TypeEnd.Error;
         _log.Error(ex);
     }
     End();
 }
Beispiel #2
0
 private bool CheckEnd()
 {
     if (_tokenCrawler.IsCancellationRequested)
     {
         _typeEnd = TypeEnd.Immediate;
         return(true);
     }
     else if (_linkQueue.Count == 0)
     {
         _typeEnd = TypeEnd.Success;
         return(true);
     }
     else if ((DateTime.Now - _timeStart).Hours >= _config.MaxHourFindNew)
     {
         _typeEnd = TypeEnd.OverTime;
         return(true);
     }
     else
     {
         return(false);
     }
 }
Beispiel #3
0
        public void StartCrawler()
        {
            try
            {
                UpdateLastStart();
                if (Init())
                {
                    RunReportRunning();
                    AddRootQueue();
                    _log.Info(GetPrefixLog());
                    while (!CheckEnd())
                    {
                        var    jobCrawl = _linkQueue.Dequeue();
                        string strLog   = string.Format(GetPrefixLog() + string.Format(" Url: {0} Deep: {1}", jobCrawl.Url, jobCrawl.Deep));
                        _log.Info(strLog);
                        if (EventReportRun != null)
                        {
                            EventReportRun(strLog);
                        }
                        DelayCrawler();

                        if (!IsNoVisitUrl(jobCrawl.Url) &&
                            (_crcProductOldGroup.Count + _countNewProduct < _limitProductValid))
                        {
                            _countVisited++;
                            _producerVisitedLinkFindNew.PublishString(
                                Newtonsoft.Json.JsonConvert.SerializeObject(new VisitedLinkFindNew()
                            {
                                CompanyId  = _companyId,
                                ProductId  = jobCrawl.Id,
                                Url        = jobCrawl.Url,
                                Session    = _session,
                                LastUpdate = DateTime.Now
                            }), false, 300);
                            var html = GetHtmlCode(jobCrawl.Url, _config.UseClearHtml);
                            if (html != "")
                            {
                                ProcessLink(jobCrawl, html);
                            }
                        }
                    }
                    End();
                }
            }
            catch (OperationCanceledException oce)
            {
                _typeEnd = TypeEnd.Immediate;
                _log.Info("Push job back queue");
                End();
                throw;
            }
            catch (Exception ex01)
            {
                _log.Error(ex01);
                _producerReportError.PublishString(
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId,
                    ProductId = 0,
                    TimeError = DateTime.Now,
                    Message   = ex01.Message + "\n" + ex01.StackTrace,
                    Url       = ""
                }), true, 0);
            }
        }