private static void ResetAllCopmanyInfo() { ProductAdapter productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); RedisCacheCompanyCrawler redisCompanyInfo = RedisCacheCompanyCrawler.Instance(); List <long> lstCompanyCrawler = productAdapter.GetAllCompanyIdCrawler(); for (int i = 0; i < lstCompanyCrawler.Count; i++) { Company comp = new Company(lstCompanyCrawler[i]); redisCompanyInfo.SetCompanyInfo(comp.ID, comp.Domain, 12, 12); log.Info(string.Format("Pushed companyInfo {0}/{1}", i, lstCompanyCrawler.Count - 1)); } log.Info("Success push companyInfo"); }
private static void SyncCompanyCrawler() { int count = 0; RedisCacheCompanyCrawler redisCompany = RedisCacheCompanyCrawler.Instance(); RedisCompanyWaitCrawler redisCompanyWaitCrawler = RedisCompanyWaitCrawler.Instance(); RedisLastUpdateProduct redisLstProduct = RedisLastUpdateProduct.Instance(); CacheProductInfo cacheProductInfo = new CacheProductInfo(new SqlDb(Server.ConnectionString)); ProductAdapter productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); List <long> lstCrawler = productAdapter.GetAllCompanyIdCrawlerReload(); for (int i = 0; i < lstCrawler.Count; i++) { redisCompanyWaitCrawler.SetNexReload(lstCrawler[i], -10); //long companyID = lstCrawler[i]; //if (!redisCompanyWaitCrawler.CheckHaveItemReload(companyID)) //{ // count++; // redisCompanyWaitCrawler.SetNexReload(companyID, 1); // Company cmp = new Company(companyID); // redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1); // cacheProductInfo.RefeshCacheProductInfo(companyID); // redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID)); //} log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count)); } List <long> lstCrawlerFindNew = productAdapter.GetAllCompanyIdCrawlerFindNew(); for (int i = 0; i < lstCrawler.Count; i++) { redisCompanyWaitCrawler.SetNexFindNew(lstCrawler[i], -10); long companyID = lstCrawler[i]; //if (!redisCompanyWaitCrawler.CheckHaveItemFindNew(companyID)) //{ // count++; // redisCompanyWaitCrawler.SetNexFindNew(companyID, 1); // Company cmp = new Company(companyID); // redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1); // cacheProductInfo.RefeshCacheProductInfo(companyID); // redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID)); //} log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count)); } Console.WriteLine("Success sync company crawl!"); }
private void InitData() { _productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); _linkQueue = new Queue <string>(); _crcProductOldGroup = new Dictionary <long, bool>(); _visitedCrc = new Dictionary <long, bool>(); _productsReloaded = new List <long>(); _redisQueueFindNew = RedisQueueFindNew.Instance(); _redisCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance(); _redisLastUpdateProduct = RedisLastUpdateProduct.Instance(); _redisWaitCrawler = RedisCompanyWaitCrawler.Instance(); _redisCrcVisited = RedisCrcVisitedFindNew.Instance(); _mqLogQueueVisit = MQLogQueueVisit.Instance(); _mqLogWarning = MQLogWarningFindNew.Instance(); _mqLogChangePrice = new MqLogChangePrice(); }
public bool Init() { try { var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler); _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning); _producerReportError = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler); _producerProductChange = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct); _producerDuplicateProduct = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache); _producerEndCrawler = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession); _producerVisitedLinkFindNew = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew); _company = new Company(_companyId); _config = new Configuration(_companyId); if (_config.LimitProductValid == 0) { this._limitProductValid = 1000000; } _rootUri = new Uri(_company.Website); _cacheCrcVisited = RedisCrcVisitedFindNew.Instance(); _cacheWaitCrawler = RedisCompanyWaitCrawler.Instance(); _cacheLastUpdateProduct = RedisLastUpdateProduct.Instance(); _cacheProductHash = CacheProductHash.Instance(); _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance(); _cacheDuplicateProduct = CacheDuplicateProduct.Instance(); _company = new Company(_companyId); _config = new Configuration(_companyId); _visitedCrc = new HashSet <long>(); _linkQueue = new Queue <JobFindNew>(); _crcProductOldGroup = new HashSet <long>(); _dicDuplicate = new Dictionary <long, long>(); _countVisited = 0; _countNewProduct = 0; _tokenCrawler.ThrowIfCancellationRequested(); _visitRegexs = _config.VisitUrlsRegex; _detailLinkRegexs = _config.ProductUrlsRegex; _noCrawlerRegexs = _config.NoVisitUrlRegex ?? new List <string>(); _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault); _timeStart = DateTime.Now; _rootUri = Common.GetUriFromUrl(_company.Website); _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId); ClearOldCache(); LoadCrcOldProduct(); LoadOldQueue(); return(true); } catch (Exception ex) { _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace)); string mss = Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler() { CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace }); _producerReportError.PublishString(mss, true); if (_producerEndCrawler != null) { _producerEndCrawler.PublishString(new CrawlerSessionLog() { CompanyId = _companyId, CountChange = 0, CountProduct = 0, CountVisited = 0, Domain = "", EndAt = DateTime.Now, Ip = Dns.GetHostName(), NumberDuplicateProduct = 0, Session = this._session, StartAt = this._timeStart, TotalProduct = 0, TypeCrawler = 0, TypeEnd = "Error Init", TypeRun = "Auto" }.ToJson()); } return(false); } }