private void InitData() { _productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); _linkQueue = new Queue <string>(); _crcProductOldGroup = new Dictionary <long, bool>(); _visitedCrc = new Dictionary <long, bool>(); _productsReloaded = new List <long>(); _redisQueueFindNew = RedisQueueFindNew.Instance(); _redisCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance(); _redisLastUpdateProduct = RedisLastUpdateProduct.Instance(); _redisWaitCrawler = RedisCompanyWaitCrawler.Instance(); _redisCrcVisited = RedisCrcVisitedFindNew.Instance(); _mqLogQueueVisit = MQLogQueueVisit.Instance(); _mqLogWarning = MQLogWarningFindNew.Instance(); _mqLogChangePrice = new MqLogChangePrice(); }
public bool Init() { try { var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler); _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning); _producerReportError = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler); _producerProductChange = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct); _producerDuplicateProduct = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache); _producerEndCrawler = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession); _producerVisitedLinkFindNew = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew); _company = new Company(_companyId); _config = new Configuration(_companyId); if (_config.LimitProductValid == 0) { this._limitProductValid = 1000000; } _rootUri = new Uri(_company.Website); _cacheCrcVisited = RedisCrcVisitedFindNew.Instance(); _cacheWaitCrawler = RedisCompanyWaitCrawler.Instance(); _cacheLastUpdateProduct = RedisLastUpdateProduct.Instance(); _cacheProductHash = CacheProductHash.Instance(); _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance(); _cacheDuplicateProduct = CacheDuplicateProduct.Instance(); _company = new Company(_companyId); _config = new Configuration(_companyId); _visitedCrc = new HashSet <long>(); _linkQueue = new Queue <JobFindNew>(); _crcProductOldGroup = new HashSet <long>(); _dicDuplicate = new Dictionary <long, long>(); _countVisited = 0; _countNewProduct = 0; _tokenCrawler.ThrowIfCancellationRequested(); _visitRegexs = _config.VisitUrlsRegex; _detailLinkRegexs = _config.ProductUrlsRegex; _noCrawlerRegexs = _config.NoVisitUrlRegex ?? new List <string>(); _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault); _timeStart = DateTime.Now; _rootUri = Common.GetUriFromUrl(_company.Website); _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId); ClearOldCache(); LoadCrcOldProduct(); LoadOldQueue(); return(true); } catch (Exception ex) { _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace)); string mss = Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler() { CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace }); _producerReportError.PublishString(mss, true); if (_producerEndCrawler != null) { _producerEndCrawler.PublishString(new CrawlerSessionLog() { CompanyId = _companyId, CountChange = 0, CountProduct = 0, CountVisited = 0, Domain = "", EndAt = DateTime.Now, Ip = Dns.GetHostName(), NumberDuplicateProduct = 0, Session = this._session, StartAt = this._timeStart, TotalProduct = 0, TypeCrawler = 0, TypeEnd = "Error Init", TypeRun = "Auto" }.ToJson()); } return(false); } }