Пример #1
0
        private void RefreshData()
        {
            Dictionary <long, long> lstProductId     = redisLastUpdate.GetAllData(this.companyID);
            List <ProductCache>     lstProductCache  = new List <ProductCache>();
            CacheProductHash        cacheProductHash = CacheProductHash.Instance();
            var lsthash = cacheProductHash.GetAllProductHash(this.companyID, lstProductId.Keys.ToList());

            this.gridControl1.DataSource = lsthash;
        }
Пример #2
0
        public void GetAllProductHashTest1()
        {
            Server.ConnectionString = @"Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200";
            ProductAdapter   pa = new ProductAdapter(new SqlDb(Server.ConnectionString));
            CacheProductHash cacheProductHash = CacheProductHash.Instance();
            List <long>      lst = new List <long>();
            List <long>      x   = pa.GetAllProductIDsByCompany(3722972174058063651).ToList();
            var products         = cacheProductHash.GetAllProductHash(3722972174058063651, x);

            Assert.Greater(products.Count, 0);
        }
Пример #3
0
        private static void ResetCacheProduct(long companyID)
        {
            CacheProductHash       cashProductHash        = CacheProductHash.Instance();
            RedisLastUpdateProduct cacheLastUpdateProduct = RedisLastUpdateProduct.Instance();
            List <ProductHash>     lst            = new List <ProductHash>();
            List <long>            lstLastUpdate  = new List <long>();
            ProductAdapter         productAdapter = new ProductAdapter(new SqlDb(Server.ConnectionString));
            Company cmp = new Company(companyID);

            productAdapter.DeleteProductUnvalidOfCOmpany(companyID);
            DataTable tbl = productAdapter.GetProductResetColumnDuplicateAndChange(companyID);

            foreach (DataRow rowProduct in tbl.Rows)
            {
                long   ProductID        = Common.Obj2Int64(rowProduct["ID"]);
                string Name             = rowProduct["Name"].ToString();
                long   Price            = Common.Obj2Int64(rowProduct["Price"]);
                string ImageUrl         = Convert.ToString(rowProduct["ImageUrls"]);
                string DetailUrl        = Convert.ToString(rowProduct["DetailUrl"]);
                int    InStock          = Common.Obj2Int(rowProduct["InStock"]);
                bool   Valid            = Common.Obj2Bool(rowProduct["Valid"]);
                string shortDescription = Common.CellToString(rowProduct["ShortDescription"], "");
                bool   IsDeal           = Common.Obj2Bool(rowProduct["IsDeal"]);
                long   CategoryID       = rowProduct["ClassificationID"] == DBNull.Value ? 0 : Common.Obj2Int64(rowProduct["ClassificationID"]);
                long   HashChange       = Product.GetHashChangeInfo(InStock, Valid, Price, Name, ImageUrl, CategoryID, shortDescription);
                long   HashDuplicate    = Product.GetHashCheckDuplicate(cmp.Domain, Price, Name, ImageUrl);
                long   HashImage        = Product.GetHashImageInfo(ImageUrl);
                lst.Add(new ProductHash()
                {
                    HashChange    = HashChange,
                    HashDuplicate = HashDuplicate,
                    HashImage     = HashImage,
                    Id            = ProductID,
                    Price         = Price,
                    url           = DetailUrl
                });
                lstLastUpdate.Add(ProductID);
            }
            cashProductHash.SetCacheProductHash(companyID, lst, 100);
            cacheLastUpdateProduct.RemoveAllLstProduct(companyID);
            cacheLastUpdateProduct.UpdateBathLastUpdateProduct(companyID, lstLastUpdate, DateTime.Now.AddDays(-1));
            productAdapter.UpdateCountProductForCompany(companyID, lstLastUpdate.Count, lstLastUpdate.Count);
            lst.Clear();
            lstLastUpdate.Clear();
        }
Пример #4
0
        public bool Init()
        {
            try
            {
                var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning);
                _producerReportError          = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange        = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct     = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerEndCrawler           = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);
                _producerVisitedLinkFindNew   = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew);
                _company = new Company(_companyId);
                _config  = new Configuration(_companyId);
                if (_config.LimitProductValid == 0)
                {
                    this._limitProductValid = 1000000;
                }
                _rootUri                  = new Uri(_company.Website);
                _cacheCrcVisited          = RedisCrcVisitedFindNew.Instance();
                _cacheWaitCrawler         = RedisCompanyWaitCrawler.Instance();
                _cacheLastUpdateProduct   = RedisLastUpdateProduct.Instance();
                _cacheProductHash         = CacheProductHash.Instance();
                _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
                _cacheDuplicateProduct    = CacheDuplicateProduct.Instance();
                _company                  = new Company(_companyId);
                _config             = new Configuration(_companyId);
                _visitedCrc         = new HashSet <long>();
                _linkQueue          = new Queue <JobFindNew>();
                _crcProductOldGroup = new HashSet <long>();
                _dicDuplicate       = new Dictionary <long, long>();
                _countVisited       = 0;
                _countNewProduct    = 0;
                _tokenCrawler.ThrowIfCancellationRequested();
                _visitRegexs      = _config.VisitUrlsRegex;
                _detailLinkRegexs = _config.ProductUrlsRegex;
                _noCrawlerRegexs  = _config.NoVisitUrlRegex ?? new List <string>();
                _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault);
                _timeStart          = DateTime.Now;
                _rootUri            = Common.GetUriFromUrl(_company.Website);
                _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId);


                ClearOldCache();
                LoadCrcOldProduct();
                LoadOldQueue();

                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace));
                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true);
                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }
                return(false);
            }
        }
Пример #5
0
        public bool Init()
        {
            try
            {
                _cacheWaitCrawler = RedisCompanyWaitCrawler.Instance();
                _redisLastCrl     = RedisLastUpdateProduct.Instance();
                _config           = new Configuration(_companyId, true);
                _company          = new Company(_companyId);
                _cacheDesHash     = CacheProductDesciptioHash.Instance();

                var rabbitMQCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportError       = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange     = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct  = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerPushCompanyReload = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeCompanyReload, ConfigCrawler.RoutingkeyCompanyReload);
                _producerEndCrawler        = new ProducerBasic(rabbitMQCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);

                _cacheCheckDelete = CacheTrackDeleteProduct.Instance();
                _cacheProductHash = CacheProductHash.Instance();
                _dicTrackDie      = _cacheCheckDelete.GetDicTrackOfCompany(_companyId);

                _dicDuplicate    = new Dictionary <long, long>();
                _dicCacheProduct = new Dictionary <long, ProductHash>();
                _dicHashDesc     = new Dictionary <long, long>();
                _dicCacheProduct = new Dictionary <long, ProductHash>();
                _linksQueue      = new Queue <Job>();



                _timeStart    = DateTime.Now;
                _countChange  = 0;
                _countVisited = 0;
                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(ex);

                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }

                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true, 20);
                return(false);
            }
        }
Пример #6
0
 public void GetAllProductHashTest2()
 {
     CacheProductHash.Instance().GetAllProductHash(4811496168179482404);
 }
Пример #7
0
 public void GetAllProductHashTest()
 {
     CacheProductHash cacheProductHash = CacheProductHash.Instance();
     var products = cacheProductHash.GetAllProductHash(2365377961928198678);
 }
Пример #8
0
        public void Run(System.Threading.CancellationToken token)
        {
            ProductAdapter productAdapter = new ProductAdapter(new SqlDb("Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200"));

            log.InfoFormat("Start run at {0}", DateTime.Now.ToString(CultureInfo.InvariantCulture));
            CacheProductHash       cashProductHash        = CacheProductHash.Instance();
            RedisLastUpdateProduct cacheLastUpdateProduct = RedisLastUpdateProduct.Instance();
            int countProduct = 0;

            try
            {
                var lstFn = productAdapter.GetAllCompanyIdCrawlerFindNew();
                var lstRl = productAdapter.GetAllCompanyIdCrawlerReload();
                RedisCompanyWaitCrawler redisCache = RedisCompanyWaitCrawler.Instance();
                redisCache.SyncCompanyFindNew(lstFn);
                redisCache.SyncCompanyReload(lstRl);
            }
            catch (Exception ex)
            {
                log.Error(ex);
            }

            var lst           = new List <QT.Entities.CrawlerProduct.Cache.ProductHash>();
            var lstLastUpdate = new List <long>();
            var lstCompany    = productAdapter.GetAllCompanyIdCrawler();

            foreach (var companyID in lstCompany)
            {
                Company cmp = new Company(companyID);
                productAdapter.DeleteProductUnvalidOfCOmpany(companyID);
                DataTable tbl = productAdapter.GetProductResetColumnDuplicateAndChange(companyID);
                foreach (DataRow rowProduct in tbl.Rows)
                {
                    long   productId        = QT.Entities.Common.Obj2Int64(rowProduct["ID"]);
                    long   originPrice      = QT.Entities.Common.Obj2Int64(rowProduct["OriginPrice"]);
                    string name             = rowProduct["Name"].ToString();
                    long   price            = QT.Entities.Common.Obj2Int64(rowProduct["Price"]);
                    string imageUrl         = Convert.ToString(rowProduct["ImageUrls"]);
                    string detailUrl        = Convert.ToString(rowProduct["DetailUrl"]);
                    int    inStock          = QT.Entities.Common.Obj2Int(rowProduct["InStock"]);
                    bool   valid            = QT.Entities.Common.Obj2Bool(rowProduct["Valid"]);
                    string shortDescription = QT.Entities.Common.CellToString(rowProduct["ShortDescription"], "");
                    long   categoryId       = rowProduct["ClassificationID"] == DBNull.Value ? 0 : QT.Entities.Common.Obj2Int64(rowProduct["ClassificationID"]);
                    long   hashChange       = ProductEntity.GetHashChangeInfo(inStock, valid, price, name, imageUrl, categoryId, shortDescription, originPrice);
                    long   hashDuplicate    = Product.GetHashDuplicate(cmp.Domain, price, name, imageUrl);
                    long   hashImage        = Product.GetHashImageInfo(imageUrl);
                    lst.Add(new QT.Entities.CrawlerProduct.Cache.ProductHash()
                    {
                        HashChange    = hashChange,
                        HashDuplicate = hashDuplicate,
                        HashImage     = hashImage,
                        Id            = productId,
                        Price         = price,
                        url           = detailUrl
                    });
                    lstLastUpdate.Add(productId);
                }
                cashProductHash.SetCacheProductHash(companyID, lst, 100); cacheLastUpdateProduct.RemoveAllLstProduct(companyID);
                cacheLastUpdateProduct.UpdateBathLastUpdateProduct(companyID, lstLastUpdate, DateTime.Now.AddDays(-1));
                productAdapter.UpdateCountProductForCompany(companyID, lstLastUpdate.Count, lstLastUpdate.Count);
                lst.Clear();
                lstLastUpdate.Clear();
                log.Info(string.Format("Complete Company: {0} {1}/{2}", companyID, countProduct++, lstCompany.Count));
            }
            log.Info("Complete all company");
            NextRun = DateTime.Now.AddHours(MAX_HOUR_LOOP);
            log.InfoFormat("End at {0}", DateTime.Now.ToString());
        }