Ejemplo n.º 1
0
        private static void ResetAllCopmanyInfo()
        {
            ProductAdapter           productAdapter   = new ProductAdapter(new SqlDb(Server.ConnectionString));
            RedisCacheCompanyCrawler redisCompanyInfo = RedisCacheCompanyCrawler.Instance();
            List <long> lstCompanyCrawler             = productAdapter.GetAllCompanyIdCrawler();

            for (int i = 0; i < lstCompanyCrawler.Count; i++)
            {
                Company comp = new Company(lstCompanyCrawler[i]);
                redisCompanyInfo.SetCompanyInfo(comp.ID, comp.Domain, 12, 12);
                log.Info(string.Format("Pushed companyInfo {0}/{1}", i, lstCompanyCrawler.Count - 1));
            }
            log.Info("Success push companyInfo");
        }
Ejemplo n.º 2
0
        private static void SyncCompanyCrawler()
        {
            int count = 0;
            RedisCacheCompanyCrawler redisCompany            = RedisCacheCompanyCrawler.Instance();
            RedisCompanyWaitCrawler  redisCompanyWaitCrawler = RedisCompanyWaitCrawler.Instance();
            RedisLastUpdateProduct   redisLstProduct         = RedisLastUpdateProduct.Instance();
            CacheProductInfo         cacheProductInfo        = new CacheProductInfo(new SqlDb(Server.ConnectionString));

            ProductAdapter productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString));
            List <long>    lstCrawler     = productAdapter.GetAllCompanyIdCrawlerReload();

            for (int i = 0; i < lstCrawler.Count; i++)
            {
                redisCompanyWaitCrawler.SetNexReload(lstCrawler[i], -10);

                //long companyID = lstCrawler[i];
                //if (!redisCompanyWaitCrawler.CheckHaveItemReload(companyID))
                //{
                //    count++;
                //    redisCompanyWaitCrawler.SetNexReload(companyID, 1);
                //    Company cmp = new Company(companyID);
                //    redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1);
                //    cacheProductInfo.RefeshCacheProductInfo(companyID);
                //    redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID));
                //}
                log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count));
            }

            List <long> lstCrawlerFindNew = productAdapter.GetAllCompanyIdCrawlerFindNew();

            for (int i = 0; i < lstCrawler.Count; i++)
            {
                redisCompanyWaitCrawler.SetNexFindNew(lstCrawler[i], -10);
                long companyID = lstCrawler[i];

                //if (!redisCompanyWaitCrawler.CheckHaveItemFindNew(companyID))
                //{
                //    count++;
                //    redisCompanyWaitCrawler.SetNexFindNew(companyID, 1);
                //    Company cmp = new Company(companyID);
                //    redisCompany.SetCompanyInfo(companyID, cmp.Domain, 1, 1);
                //    cacheProductInfo.RefeshCacheProductInfo(companyID);
                //    redisLstProduct.ResetLastUpdateForCompany(companyID, productAdapter.GetListProductIdOfCompany(companyID));

                //}
                log.Info(string.Format("sync company {0} {1}/{2}", count, i, lstCrawler.Count));
            }
            Console.WriteLine("Success sync company crawl!");
        }
Ejemplo n.º 3
0
        private void InitData()
        {
            _productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString));


            _linkQueue          = new Queue <string>();
            _crcProductOldGroup = new Dictionary <long, bool>();
            _visitedCrc         = new Dictionary <long, bool>();
            _productsReloaded   = new List <long>();

            _redisQueueFindNew        = RedisQueueFindNew.Instance();
            _redisCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
            _redisLastUpdateProduct   = RedisLastUpdateProduct.Instance();
            _redisWaitCrawler         = RedisCompanyWaitCrawler.Instance();
            _redisCrcVisited          = RedisCrcVisitedFindNew.Instance();

            _mqLogQueueVisit  = MQLogQueueVisit.Instance();
            _mqLogWarning     = MQLogWarningFindNew.Instance();
            _mqLogChangePrice = new MqLogChangePrice();
        }
Ejemplo n.º 4
0
        public bool Init()
        {
            try
            {
                var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning);
                _producerReportError          = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange        = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct     = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerEndCrawler           = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);
                _producerVisitedLinkFindNew   = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew);
                _company = new Company(_companyId);
                _config  = new Configuration(_companyId);
                if (_config.LimitProductValid == 0)
                {
                    this._limitProductValid = 1000000;
                }
                _rootUri                  = new Uri(_company.Website);
                _cacheCrcVisited          = RedisCrcVisitedFindNew.Instance();
                _cacheWaitCrawler         = RedisCompanyWaitCrawler.Instance();
                _cacheLastUpdateProduct   = RedisLastUpdateProduct.Instance();
                _cacheProductHash         = CacheProductHash.Instance();
                _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
                _cacheDuplicateProduct    = CacheDuplicateProduct.Instance();
                _company                  = new Company(_companyId);
                _config             = new Configuration(_companyId);
                _visitedCrc         = new HashSet <long>();
                _linkQueue          = new Queue <JobFindNew>();
                _crcProductOldGroup = new HashSet <long>();
                _dicDuplicate       = new Dictionary <long, long>();
                _countVisited       = 0;
                _countNewProduct    = 0;
                _tokenCrawler.ThrowIfCancellationRequested();
                _visitRegexs      = _config.VisitUrlsRegex;
                _detailLinkRegexs = _config.ProductUrlsRegex;
                _noCrawlerRegexs  = _config.NoVisitUrlRegex ?? new List <string>();
                _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault);
                _timeStart          = DateTime.Now;
                _rootUri            = Common.GetUriFromUrl(_company.Website);
                _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId);


                ClearOldCache();
                LoadCrcOldProduct();
                LoadOldQueue();

                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace));
                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true);
                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }
                return(false);
            }
        }