示例#1
0
 private void Init()
 {
     _redisDesHash              = CacheProductDesciptioHash.Instance();
     _jobClientLogChangePrice   = new MqLogChangePrice();
     _jobClientLogChangeProduct = MQLogChangeProduct.Instance();
     _dicDuplicate              = new Dictionary <long, long>();
     _dicCacheProduct           = new Dictionary <long, ProductCache>();
     _linksQueue             = new Queue <NSCrawler.Job>();
     _productAdapter         = new ProductAdapter(new SqlDb(Server.ConnectionString));
     _redisWaitCrawler       = RedisCompanyWaitCrawler.Instance();
     _redisProduct           = RedisCacheProductInfo.Instance();
     _redisProductLastUpdate = RedisLastUpdateProduct.Instance();
     _publiserDesciption     = new PublisherDesciption();
 }
示例#2
0
        private static void SyncCompanyCrawler()
        {
            int count = 0;
            RedisCacheCompanyCrawler redisCompany            = RedisCacheCompanyCrawler.Instance();
            RedisCompanyWaitCrawler  redisCompanyWaitCrawler = RedisCompanyWaitCrawler.Instance();
            RedisLastUpdateProduct   redisLstProduct         = RedisLastUpdateProduct.Instance();
            CacheProductInfo         cacheProductInfo        = new CacheProductInfo(new SqlDb(Server.ConnectionString));

            ProductAdapter productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString));
            List <long>    lstCrawler     = productAdapter.GetAllCompanyIdCrawlerReload();

            for (int i = 0; i < lstCrawler.Count; i++)
            {
                redisCompanyWaitCrawler.SetNexReload(lstCrawler[i], -10);

                //long companyID = lstCrawler[i];
                //if (!redisCompanyWaitCrawler.CheckHaveItemReload(companyID))
                //{
                //    count++;
                //    redisCompanyWaitCrawler.SetNexReload(companyID, 1);
                //    Company cmp = new Company(companyID);
                //    redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1);
                //    cacheProductInfo.RefeshCacheProductInfo(companyID);
                //    redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID));
                //}
                log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count));
            }

            List <long> lstCrawlerFindNew = productAdapter.GetAllCompanyIdCrawlerFindNew();

            for (int i = 0; i < lstCrawler.Count; i++)
            {
                redisCompanyWaitCrawler.SetNexFindNew(lstCrawler[i], -10);
                long companyID = lstCrawler[i];

                //if (!redisCompanyWaitCrawler.CheckHaveItemFindNew(companyID))
                //{
                //    count++;
                //    redisCompanyWaitCrawler.SetNexFindNew(companyID, 1);
                //    Company cmp = new Company(companyID);
                //    redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1);
                //    cacheProductInfo.RefeshCacheProductInfo(companyID);
                //    redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID));

                //}
                log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count));
            }
            Console.WriteLine("Success sync company crawl!");
        }
示例#3
0
        private void InitData()
        {
            _productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString));


            _linkQueue          = new Queue <string>();
            _crcProductOldGroup = new Dictionary <long, bool>();
            _visitedCrc         = new Dictionary <long, bool>();
            _productsReloaded   = new List <long>();

            _redisQueueFindNew        = RedisQueueFindNew.Instance();
            _redisCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
            _redisLastUpdateProduct   = RedisLastUpdateProduct.Instance();
            _redisWaitCrawler         = RedisCompanyWaitCrawler.Instance();
            _redisCrcVisited          = RedisCrcVisitedFindNew.Instance();

            _mqLogQueueVisit  = MQLogQueueVisit.Instance();
            _mqLogWarning     = MQLogWarningFindNew.Instance();
            _mqLogChangePrice = new MqLogChangePrice();
        }
示例#4
0
        public bool Init()
        {
            try
            {
                var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning);
                _producerReportError          = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange        = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct     = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerEndCrawler           = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);
                _producerVisitedLinkFindNew   = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew);
                _company = new Company(_companyId);
                _config  = new Configuration(_companyId);
                if (_config.LimitProductValid == 0)
                {
                    this._limitProductValid = 1000000;
                }
                _rootUri                  = new Uri(_company.Website);
                _cacheCrcVisited          = RedisCrcVisitedFindNew.Instance();
                _cacheWaitCrawler         = RedisCompanyWaitCrawler.Instance();
                _cacheLastUpdateProduct   = RedisLastUpdateProduct.Instance();
                _cacheProductHash         = CacheProductHash.Instance();
                _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
                _cacheDuplicateProduct    = CacheDuplicateProduct.Instance();
                _company                  = new Company(_companyId);
                _config             = new Configuration(_companyId);
                _visitedCrc         = new HashSet <long>();
                _linkQueue          = new Queue <JobFindNew>();
                _crcProductOldGroup = new HashSet <long>();
                _dicDuplicate       = new Dictionary <long, long>();
                _countVisited       = 0;
                _countNewProduct    = 0;
                _tokenCrawler.ThrowIfCancellationRequested();
                _visitRegexs      = _config.VisitUrlsRegex;
                _detailLinkRegexs = _config.ProductUrlsRegex;
                _noCrawlerRegexs  = _config.NoVisitUrlRegex ?? new List <string>();
                _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault);
                _timeStart          = DateTime.Now;
                _rootUri            = Common.GetUriFromUrl(_company.Website);
                _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId);


                ClearOldCache();
                LoadCrcOldProduct();
                LoadOldQueue();

                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace));
                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true);
                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }
                return(false);
            }
        }
示例#5
0
        public bool Init()
        {
            try
            {
                _cacheWaitCrawler = RedisCompanyWaitCrawler.Instance();
                _redisLastCrl     = RedisLastUpdateProduct.Instance();
                _config           = new Configuration(_companyId, true);
                _company          = new Company(_companyId);
                _cacheDesHash     = CacheProductDesciptioHash.Instance();

                var rabbitMQCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportError       = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange     = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct  = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerPushCompanyReload = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeCompanyReload, ConfigCrawler.RoutingkeyCompanyReload);
                _producerEndCrawler        = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);

                _cacheCheckDelete = CacheTrackDeleteProduct.Instance();
                _cacheProductHash = CacheProductHash.Instance();
                _dicTrackDie      = _cacheCheckDelete.GetDicTrackOfCompany(_companyId);

                _dicDuplicate    = new Dictionary <long, long>();
                _dicCacheProduct = new Dictionary <long, ProductHash>();
                _dicHashDesc     = new Dictionary <long, long>();
                _dicCacheProduct = new Dictionary <long, ProductHash>();
                _linksQueue      = new Queue <Job>();



                _timeStart    = DateTime.Now;
                _countChange  = 0;
                _countVisited = 0;
                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(ex);

                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }

                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true, 20);
                return(false);
            }
        }
示例#6
0
        public void Run(System.Threading.CancellationToken token)
        {
            ProductAdapter productAdapter = new ProductAdapter(new SqlDb("Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200"));

            log.InfoFormat("Start run at {0}", DateTime.Now.ToString(CultureInfo.InvariantCulture));
            CacheProductHash       cashProductHash        = CacheProductHash.Instance();
            RedisLastUpdateProduct cacheLastUpdateProduct = RedisLastUpdateProduct.Instance();
            int countProduct = 0;

            try
            {
                var lstFn = productAdapter.GetAllCompanyIdCrawlerFindNew();
                var lstRl = productAdapter.GetAllCompanyIdCrawlerReload();
                RedisCompanyWaitCrawler redisCache = RedisCompanyWaitCrawler.Instance();
                redisCache.SyncCompanyFindNew(lstFn);
                redisCache.SyncCompanyReload(lstRl);
            }
            catch (Exception ex)
            {
                log.Error(ex);
            }

            var lst           = new List <QT.Entities.CrawlerProduct.Cache.ProductHash>();
            var lstLastUpdate = new List <long>();
            var lstCompany    = productAdapter.GetAllCompanyIdCrawler();

            foreach (var companyID in lstCompany)
            {
                Company cmp = new Company(companyID);
                productAdapter.DeleteProductUnvalidOfCOmpany(companyID);
                DataTable tbl = productAdapter.GetProductResetColumnDuplicateAndChange(companyID);
                foreach (DataRow rowProduct in tbl.Rows)
                {
                    long   productId        = QT.Entities.Common.Obj2Int64(rowProduct["ID"]);
                    long   originPrice      = QT.Entities.Common.Obj2Int64(rowProduct["OriginPrice"]);
                    string name             = rowProduct["Name"].ToString();
                    long   price            = QT.Entities.Common.Obj2Int64(rowProduct["Price"]);
                    string imageUrl         = Convert.ToString(rowProduct["ImageUrls"]);
                    string detailUrl        = Convert.ToString(rowProduct["DetailUrl"]);
                    int    inStock          = QT.Entities.Common.Obj2Int(rowProduct["InStock"]);
                    bool   valid            = QT.Entities.Common.Obj2Bool(rowProduct["Valid"]);
                    string shortDescription = QT.Entities.Common.CellToString(rowProduct["ShortDescription"], "");
                    long   categoryId       = rowProduct["ClassificationID"] == DBNull.Value ? 0 : QT.Entities.Common.Obj2Int64(rowProduct["ClassificationID"]);
                    long   hashChange       = ProductEntity.GetHashChangeInfo(inStock, valid, price, name, imageUrl, categoryId, shortDescription, originPrice);
                    long   hashDuplicate    = Product.GetHashDuplicate(cmp.Domain, price, name, imageUrl);
                    long   hashImage        = Product.GetHashImageInfo(imageUrl);
                    lst.Add(new QT.Entities.CrawlerProduct.Cache.ProductHash()
                    {
                        HashChange    = hashChange,
                        HashDuplicate = hashDuplicate,
                        HashImage     = hashImage,
                        Id            = productId,
                        Price         = price,
                        url           = detailUrl
                    });
                    lstLastUpdate.Add(productId);
                }
                cashProductHash.SetCacheProductHash(companyID, lst, 100); cacheLastUpdateProduct.RemoveAllLstProduct(companyID);
                cacheLastUpdateProduct.UpdateBathLastUpdateProduct(companyID, lstLastUpdate, DateTime.Now.AddDays(-1));
                productAdapter.UpdateCountProductForCompany(companyID, lstLastUpdate.Count, lstLastUpdate.Count);
                lst.Clear();
                lstLastUpdate.Clear();
                log.Info(string.Format("Complete Company: {0} {1}/{2}", companyID, countProduct++, lstCompany.Count));
            }
            log.Info("Complete all company");
            NextRun = DateTime.Now.AddHours(MAX_HOUR_LOOP);
            log.InfoFormat("End at {0}", DateTime.Now.ToString());
        }