Пример #1
0
        public static void ShowProduct(long CompanyId)
        {
            Entities.Company company   = new Entities.Company(CompanyId);
            Configuration    config    = new Configuration(CompanyId);
            ProductParse     pp        = new ProductParse();
            ProductEntity    product   = new ProductEntity();
            string           detailUrl = config.LinkTest;

            GABIZ.Base.HtmlAgilityPack.HtmlDocument document = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
            pp.Analytics(product, document, config.LinkTest, config, config.Domain);
            string strDataShow = "";

            strDataShow    += string.Format("\r\n Name: {0}", product.Name);
            frmShow.Visible = true;
            frmShow.Show();
        }
Пример #2
0
        protected override void OnStart(string[] args)
        {
            log.Info("Start service");
            try
            {
                InitializeComponent();
                cancelTokenSource = new CancellationTokenSource();
                string rabbitMQServerName = ConfigurationManager.AppSettings["rabbitMQServerName"];
                workers        = new Worker[workerCount];
                rabbitMQServer = RabbitMQManager.GetRabbitMQServer(rabbitMQServerName);

                string connectToSQL        = @"Data Source=172.22.30.86,1455;Initial Catalog=QT_2;Persist Security Info=True;User ID=qt_vn;Password=@F4sJ=l9/ryJt9MT;connection timeout=200";
                string connectToConnection = @"Data Source=42.112.28.93;Initial Catalog=QT_2;Persist Security Info=True;User ID=wss_price;Password=HzlRt4$$axzG-*UlpuL2gYDu;connection timeout=200";
                CrawlerProductAdapter crawlerProductAdapter = new CrawlerProductAdapter(new SqlDb(connectToSQL));
                ProductAdapter        productAdapter        = new ProductAdapter(new SqlDb(connectToConnection));


                for (int i = 0; i < workerCount; i++)
                {
                    log.InfoFormat("Start worker {i}", i.ToString());
                    var worker = new Worker(AddProductToSqlJobName, false, rabbitMQServer);
                    workers[i] = worker;
                    var  token      = this.cancelTokenSource.Token;
                    Task workerTask = new Task(() =>
                    {
                        worker.JobHandler = (downloadImageJob) =>
                        {
                            try
                            {
                                token.ThrowIfCancellationRequested();

                                QT.Entities.CrawlerProduct.RabbitMQ.MsSaveProduct Mss = QT.Entities.CrawlerProduct.RabbitMQ.MsSaveProduct.GetDataFromMessage(downloadImageJob.Data);
                                string Url     = Mss.Url;
                                string Domain  = QT.Entities.Common.GetDomainFromUrl(Url);
                                long CompanyID = QT.Entities.Common.GetIDCompany(Domain);
                                QT.Entities.Configuration config = new QT.Entities.Configuration(CompanyID);
                                if (_company.Status == Common.CompanyStatus.WEB_CRAWLERDOMAIN)
                                {
                                    List <QT.Entities.Company> ls = new List <QT.Entities.Company>();
                                    QT.Entities.CrawlerDomain obj = new CrawlerDomain();
                                    string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(Url.Trim(), 15, 1);
                                    GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                    html = html.Replace("<form", "<div");
                                    html = html.Replace("</form", "</div");
                                    doc.LoadHtml(html);
                                }
                                else
                                {
                                    int numberItemSaved = 0;
                                    string[] arLink     = Url.Trim().Split(SqlDb.arSplit, StringSplitOptions.RemoveEmptyEntries);
                                    foreach (var item in arLink)
                                    {
                                        QT.Entities.Product _product = new Product();
                                        string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(item, 45, 2);
                                        if (config.ContentAnanyticXPath.Count >= 1)
                                        {
                                            int i1 = 0, i2 = 0;
                                            i1     = html.IndexOf(config.ContentAnanyticXPath[0]);
                                            if (i1 >= 0)
                                            {
                                                html = html.Substring(i1);
                                                if (config.ContentAnanyticXPath.Count >= 2)
                                                {
                                                    i2 = html.IndexOf(config.ContentAnanyticXPath[1]);
                                                    if (i2 >= 0)
                                                    {
                                                        html = html.Substring(0, i2 + config.ContentAnanyticXPath[1].Length);
                                                    }
                                                }
                                            }
                                            html = html.Replace("<form", "<div");
                                            html = html.Replace("</form", "</div");
                                            html = Common.TidyCleanR(html);
                                        }

                                        _htmlSource = html;
                                        GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                        html = html.Replace("<form", "<div");
                                        html = html.Replace("</form", "</div");
                                        doc.LoadHtml(html);

                                        List <Product> lstUpdateProduct = new List <Product>();
                                        List <Product> lstInsertProduct = new List <Product>();

                                        _product.Analytics(doc, item, config, true, _company.Domain);

                                        if (_product != null && _product.IsSuccessData(config.CheckPrice))
                                        {
                                            numberItemSaved++;
                                            if (productAdapter.CheckExistInDb(_product.ID))
                                            {
                                                lstUpdateProduct.Add(_product);
                                            }
                                            else
                                            {
                                                lstInsertProduct.Add(_product);
                                            }

                                            productAdapter.UpdateProductsChangeToDb(lstUpdateProduct);
                                            productAdapter.InsertListProduct(lstInsertProduct);

                                            productAdapter.PushQueueIndexCompany(config.CompanyID);
                                            productAdapter.PushQueueChangeChangeImage(new MQChangeImage()
                                            {
                                                ProductID = _product.ID,
                                                Type      = 1
                                            });

                                            log.InfoFormat("Saved {0} item product!", _product.Name);
                                        }
                                    }
                                }

                                return(true);
                            }
                            catch (OperationCanceledException opc)
                            {
                                log.Info("End worker");
                                return(false);
                            }
                        };
                        worker.Start();
                    }, token);
                    workerTask.Start();
                    log.InfoFormat("Worker {0} started", i);
                }
            }
            catch (Exception ex)
            {
                log.Error("Start error", ex);
                throw;
            }
        }
Пример #3
0
        public bool Init()
        {
            try
            {
                var rabbitMqCrawler = RabbitMQManager.GetRabbitMQServer(ConfigCrawler.KeyRabbitMqCrawler);
                _producerReportSessionRunning = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeSessionRunning, ConfigCrawler.RoutingkeySessionRunning);
                _producerReportError          = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeErorrCrawler, ConfigCrawler.RoutingKeyErrorCrawler);
                _producerProductChange        = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeChangeProduct, ConfigCrawler.RoutingkeyChangeProduct);
                _producerDuplicateProduct     = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeDuplicateProductToCache, ConfigCrawler.ExchangeDuplicateProductToCache);
                _producerEndCrawler           = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeEndSession, ConfigCrawler.RoutingEndSession);
                _producerVisitedLinkFindNew   = new ProducerBasic(rabbitMqCrawler, ConfigCrawler.ExchangeVisitedLinkFindNew, ConfigCrawler.RoutingKeyVisitedLinkFindNew);
                _company = new Company(_companyId);
                _config  = new Configuration(_companyId);
                if (_config.LimitProductValid == 0)
                {
                    this._limitProductValid = 1000000;
                }
                _rootUri                  = new Uri(_company.Website);
                _cacheCrcVisited          = RedisCrcVisitedFindNew.Instance();
                _cacheWaitCrawler         = RedisCompanyWaitCrawler.Instance();
                _cacheLastUpdateProduct   = RedisLastUpdateProduct.Instance();
                _cacheProductHash         = CacheProductHash.Instance();
                _cacheCacheCompanyCrawler = RedisCacheCompanyCrawler.Instance();
                _cacheDuplicateProduct    = CacheDuplicateProduct.Instance();
                _company                  = new Company(_companyId);
                _config             = new Configuration(_companyId);
                _visitedCrc         = new HashSet <long>();
                _linkQueue          = new Queue <JobFindNew>();
                _crcProductOldGroup = new HashSet <long>();
                _dicDuplicate       = new Dictionary <long, long>();
                _countVisited       = 0;
                _countNewProduct    = 0;
                _tokenCrawler.ThrowIfCancellationRequested();
                _visitRegexs      = _config.VisitUrlsRegex;
                _detailLinkRegexs = _config.ProductUrlsRegex;
                _noCrawlerRegexs  = _config.NoVisitUrlRegex ?? new List <string>();
                _noCrawlerRegexs.AddRange(UtilCrawlerProduct.NoCrawlerRegexDefault);
                _timeStart          = DateTime.Now;
                _rootUri            = Common.GetUriFromUrl(_company.Website);
                _hsDuplicateProduct = _cacheDuplicateProduct.GetHashDuplicate(_companyId);


                ClearOldCache();
                LoadCrcOldProduct();
                LoadOldQueue();

                return(true);
            }
            catch (Exception ex)
            {
                _log.Error(string.Format("Company:{0} {1} {2}", _companyId, ex.Message, ex.StackTrace));
                string mss =
                    Newtonsoft.Json.JsonConvert.SerializeObject(new ErrorCrawler()
                {
                    CompanyId = _companyId, ProductId = 0, TimeError = DateTime.Now, Message = "Init" + ex.Message + ex.StackTrace
                });
                _producerReportError.PublishString(mss, true);
                if (_producerEndCrawler != null)
                {
                    _producerEndCrawler.PublishString(new CrawlerSessionLog()
                    {
                        CompanyId              = _companyId,
                        CountChange            = 0,
                        CountProduct           = 0,
                        CountVisited           = 0,
                        Domain                 = "",
                        EndAt                  = DateTime.Now,
                        Ip                     = Dns.GetHostName(),
                        NumberDuplicateProduct = 0,
                        Session                = this._session,
                        StartAt                = this._timeStart,
                        TotalProduct           = 0,
                        TypeCrawler            = 0,
                        TypeEnd                = "Error Init",
                        TypeRun                = "Auto"
                    }.ToJson());
                }
                return(false);
            }
        }