示例#1
0
        public bool Init()
        {
            try
            {
                var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning);
                _producerReportError          = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange        = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct     = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerEndCrawler           = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);
                _producerVisitedLinkFindNew   = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew);
                _company = new Company(_companyId);
                _config  = new Configuration(_companyId);
                if (_config.LimitProductValid == 0)
                {
                    this._limitProductValid = 1000000;
                }
                _rootUri                  = new Uri(_company.Website);
                _cacheCrcVisited          = RedisCrcVisitedFindNew.Instance();
                _cacheWaitCrawler         = RedisCompanyWaitCrawler.Instance();
                _cacheLastUpdateProduct   = RedisLastUpdateProduct.Instance();
                _cacheProductHash         = CacheProductHash.Instance();
                _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
                _cacheDuplicateProduct    = CacheDuplicateProduct.Instance();
                _company                  = new Company(_companyId);
                _config             = new Configuration(_companyId);
                _visitedCrc         = new HashSet <long>();
                _linkQueue          = new Queue <JobFindNew>();
                _crcProductOldGroup = new HashSet <long>();
                _dicDuplicate       = new Dictionary <long, long>();
                _countVisited       = 0;
                _countNewProduct    = 0;
                _tokenCrawler.ThrowIfCancellationRequested();
                _visitRegexs      = _config.VisitUrlsRegex;
                _detailLinkRegexs = _config.ProductUrlsRegex;
                _noCrawlerRegexs  = _config.NoVisitUrlRegex ?? new List <string>();
                _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault);
                _timeStart          = DateTime.Now;
                _rootUri            = Common.GetUriFromUrl(_company.Website);
                _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId);


                ClearOldCache();
                LoadCrcOldProduct();
                LoadOldQueue();

                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace));
                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true);
                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }
                return(false);
            }
        }