private void Init() { _redisDesHash = CacheProductDesciptioHash.Instance(); _jobClientLogChangePrice = new MqLogChangePrice(); _jobClientLogChangeProduct = MQLogChangeProduct.Instance(); _dicDuplicate = new Dictionary <long, long>(); _dicCacheProduct = new Dictionary <long, ProductCache>(); _linksQueue = new Queue <NSCrawler.Job>(); _productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); _redisWaitCrawler = RedisCompanyWaitCrawler.Instance(); _redisProduct = RedisCacheProductInfo.Instance(); _redisProductLastUpdate = RedisLastUpdateProduct.Instance(); _publiserDesciption = new PublisherDesciption(); }
private static void SyncCompanyCrawler() { int count = 0; RedisCacheCompanyCrawler redisCompany = RedisCacheCompanyCrawler.Instance(); RedisCompanyWaitCrawler redisCompanyWaitCrawler = RedisCompanyWaitCrawler.Instance(); RedisLastUpdateProduct redisLstProduct = RedisLastUpdateProduct.Instance(); CacheProductInfo cacheProductInfo = new CacheProductInfo(new SqlDb(Server.ConnectionString)); ProductAdapter productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); List <long> lstCrawler = productAdapter.GetAllCompanyIdCrawlerReload(); for (int i = 0; i < lstCrawler.Count; i++) { redisCompanyWaitCrawler.SetNexReload(lstCrawler[i], -10); //long companyID = lstCrawler[i]; //if (!redisCompanyWaitCrawler.CheckHaveItemReload(companyID)) //{ // count++; // redisCompanyWaitCrawler.SetNexReload(companyID, 1); // Company cmp = new Company(companyID); // redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1); // cacheProductInfo.RefeshCacheProductInfo(companyID); // redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID)); //} log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count)); } List <long> lstCrawlerFindNew = productAdapter.GetAllCompanyIdCrawlerFindNew(); for (int i = 0; i < lstCrawler.Count; i++) { redisCompanyWaitCrawler.SetNexFindNew(lstCrawler[i], -10); long companyID = lstCrawler[i]; //if (!redisCompanyWaitCrawler.CheckHaveItemFindNew(companyID)) //{ // count++; // redisCompanyWaitCrawler.SetNexFindNew(companyID, 1); // Company cmp = new Company(companyID); // redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1); // cacheProductInfo.RefeshCacheProductInfo(companyID); // redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID)); //} log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count)); } Console.WriteLine("Success sync company crawl!"); }
private void InitData() { _productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString)); _linkQueue = new Queue <string>(); _crcProductOldGroup = new Dictionary <long, bool>(); _visitedCrc = new Dictionary <long, bool>(); _productsReloaded = new List <long>(); _redisQueueFindNew = RedisQueueFindNew.Instance(); _redisCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance(); _redisLastUpdateProduct = RedisLastUpdateProduct.Instance(); _redisWaitCrawler = RedisCompanyWaitCrawler.Instance(); _redisCrcVisited = RedisCrcVisitedFindNew.Instance(); _mqLogQueueVisit = MQLogQueueVisit.Instance(); _mqLogWarning = MQLogWarningFindNew.Instance(); _mqLogChangePrice = new MqLogChangePrice(); }
public bool Init() { try { var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler); _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning); _producerReportError = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler); _producerProductChange = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct); _producerDuplicateProduct = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache); _producerEndCrawler = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession); _producerVisitedLinkFindNew = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew); _company = new Company(_companyId); _config = new Configuration(_companyId); if (_config.LimitProductValid == 0) { this._limitProductValid = 1000000; } _rootUri = new Uri(_company.Website); _cacheCrcVisited = RedisCrcVisitedFindNew.Instance(); _cacheWaitCrawler = RedisCompanyWaitCrawler.Instance(); _cacheLastUpdateProduct = RedisLastUpdateProduct.Instance(); _cacheProductHash = CacheProductHash.Instance(); _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance(); _cacheDuplicateProduct = CacheDuplicateProduct.Instance(); _company = new Company(_companyId); _config = new Configuration(_companyId); _visitedCrc = new HashSet <long>(); _linkQueue = new Queue <JobFindNew>(); _crcProductOldGroup = new HashSet <long>(); _dicDuplicate = new Dictionary <long, long>(); _countVisited = 0; _countNewProduct = 0; _tokenCrawler.ThrowIfCancellationRequested(); _visitRegexs = _config.VisitUrlsRegex; _detailLinkRegexs = _config.ProductUrlsRegex; _noCrawlerRegexs = _config.NoVisitUrlRegex ?? new List <string>(); _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault); _timeStart = DateTime.Now; _rootUri = Common.GetUriFromUrl(_company.Website); _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId); ClearOldCache(); LoadCrcOldProduct(); LoadOldQueue(); return(true); } catch (Exception ex) { _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace)); string mss = Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler() { CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace }); _producerReportError.PublishString(mss, true); if (_producerEndCrawler != null) { _producerEndCrawler.PublishString(new CrawlerSessionLog() { CompanyId = _companyId, CountChange = 0, CountProduct = 0, CountVisited = 0, Domain = "", EndAt = DateTime.Now, Ip = Dns.GetHostName(), NumberDuplicateProduct = 0, Session = this._session, StartAt = this._timeStart, TotalProduct = 0, TypeCrawler = 0, TypeEnd = "Error Init", TypeRun = "Auto" }.ToJson()); } return(false); } }
public bool Init() { try { _cacheWaitCrawler = RedisCompanyWaitCrawler.Instance(); _redisLastCrl = RedisLastUpdateProduct.Instance(); _config = new Configuration(_companyId, true); _company = new Company(_companyId); _cacheDesHash = CacheProductDesciptioHash.Instance(); var rabbitMQCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler); _producerReportError = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler); _producerProductChange = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct); _producerDuplicateProduct = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache); _producerPushCompanyReload = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeCompanyReload, ConfigCrawler.RoutingkeyCompanyReload); _producerEndCrawler = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession); _cacheCheckDelete = CacheTrackDeleteProduct.Instance(); _cacheProductHash = CacheProductHash.Instance(); _dicTrackDie = _cacheCheckDelete.GetDicTrackOfCompany(_companyId); _dicDuplicate = new Dictionary <long, long>(); _dicCacheProduct = new Dictionary <long, ProductHash>(); _dicHashDesc = new Dictionary <long, long>(); _dicCacheProduct = new Dictionary <long, ProductHash>(); _linksQueue = new Queue <Job>(); _timeStart = DateTime.Now; _countChange = 0; _countVisited = 0; return(true); } catch (Exception ex) { _log.Error(ex); if (_producerEndCrawler != null) { _producerEndCrawler.PublishString(new CrawlerSessionLog() { CompanyId = _companyId, CountChange = 0, CountProduct = 0, CountVisited = 0, Domain = "", EndAt = DateTime.Now, Ip = Dns.GetHostName(), NumberDuplicateProduct = 0, Session = this._session, StartAt = this._timeStart, TotalProduct = 0, TypeCrawler = 0, TypeEnd = "Error Init", TypeRun = "Auto" }.ToJson()); } string mss = Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler() { CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace }); _producerReportError.PublishString(mss, true, 20); return(false); } }
public void Run(System.Threading.CancellationToken token) { ProductAdapter productAdapter = new ProductAdapter(new SqlDb("Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200")); log.InfoFormat("Start run at {0}", DateTime.Now.ToString(CultureInfo.InvariantCulture)); CacheProductHash cashProductHash = CacheProductHash.Instance(); RedisLastUpdateProduct cacheLastUpdateProduct = RedisLastUpdateProduct.Instance(); int countProduct = 0; try { var lstFn = productAdapter.GetAllCompanyIdCrawlerFindNew(); var lstRl = productAdapter.GetAllCompanyIdCrawlerReload(); RedisCompanyWaitCrawler redisCache = RedisCompanyWaitCrawler.Instance(); redisCache.SyncCompanyFindNew(lstFn); redisCache.SyncCompanyReload(lstRl); } catch (Exception ex) { log.Error(ex); } var lst = new List <QT.Entities.CrawlerProduct.Cache.ProductHash>(); var lstLastUpdate = new List <long>(); var lstCompany = productAdapter.GetAllCompanyIdCrawler(); foreach (var companyID in lstCompany) { Company cmp = new Company(companyID); productAdapter.DeleteProductUnvalidOfCOmpany(companyID); DataTable tbl = productAdapter.GetProductResetColumnDuplicateAndChange(companyID); foreach (DataRow rowProduct in tbl.Rows) { long productId = QT.Entities.Common.Obj2Int64(rowProduct["ID"]); long originPrice = QT.Entities.Common.Obj2Int64(rowProduct["OriginPrice"]); string name = rowProduct["Name"].ToString(); long price = QT.Entities.Common.Obj2Int64(rowProduct["Price"]); string imageUrl = Convert.ToString(rowProduct["ImageUrls"]); string detailUrl = Convert.ToString(rowProduct["DetailUrl"]); int inStock = QT.Entities.Common.Obj2Int(rowProduct["InStock"]); bool valid = QT.Entities.Common.Obj2Bool(rowProduct["Valid"]); string shortDescription = QT.Entities.Common.CellToString(rowProduct["ShortDescription"], ""); long categoryId = rowProduct["ClassificationID"] == DBNull.Value ? 0 : QT.Entities.Common.Obj2Int64(rowProduct["ClassificationID"]); long hashChange = ProductEntity.GetHashChangeInfo(inStock, valid, price, name, imageUrl, categoryId, shortDescription, originPrice); long hashDuplicate = Product.GetHashDuplicate(cmp.Domain, price, name, imageUrl); long hashImage = Product.GetHashImageInfo(imageUrl); lst.Add(new QT.Entities.CrawlerProduct.Cache.ProductHash() { HashChange = hashChange, HashDuplicate = hashDuplicate, HashImage = hashImage, Id = productId, Price = price, url = detailUrl }); lstLastUpdate.Add(productId); } cashProductHash.SetCacheProductHash(companyID, lst, 100); cacheLastUpdateProduct.RemoveAllLstProduct(companyID); cacheLastUpdateProduct.UpdateBathLastUpdateProduct(companyID, lstLastUpdate, DateTime.Now.AddDays(-1)); productAdapter.UpdateCountProductForCompany(companyID, lstLastUpdate.Count, lstLastUpdate.Count); lst.Clear(); lstLastUpdate.Clear(); log.Info(string.Format("Complete Company: {0} {1}/{2}", companyID, countProduct++, lstCompany.Count)); } log.Info("Complete all company"); NextRun = DateTime.Now.AddHours(MAX_HOUR_LOOP); log.InfoFormat("End at {0}", DateTime.Now.ToString()); }