Beispiel #1
0
        public void AnalyticsDienmayTantienTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;

            string url       = @"http://dienmaytantien.vn/detail.asp?parent_id=336&id=2204";
            long   companyId = 8223820966383374348;

            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);

            html = System.Web.HttpUtility.HtmlDecode(html);
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);
            var config  = new Configuration(companyId);
            var company = new Company(companyId);

            productParse.Analytics(productEntity, doc, url, config, company.Domain);
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #2
0
        public void Start()
        {
            ProductAdapter pa = new ProductAdapter(new SqlDb(ConfigCrawler.ConnectProduct));

            QT.Entities.Server.ConnectionString = ConfigCrawler.ConnectProduct;

            List <long> companyIds = pa.GetAllCompanyIdCrawler();

            for (int i = 0; i < companyIds.Count; i++)
            {
                long          companyId     = companyIds[i];
                Company       company       = new Company(companyId);
                Configuration configuration = new Configuration(companyId);
                ProductParse  parse         = new ProductParse();
                ProductEntity productEntity = new ProductEntity();

                HtmlDocument document = new HtmlDocument();
                string       url      = configuration.LinkTest;
                string       html     = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);

                if (!string.IsNullOrEmpty(html))
                {
                    document.LoadHtml(html);
                    try
                    {
                        parse.Analytics(productEntity, document, configuration.LinkTest, configuration, company.Domain);
                        if (!productEntity.IsSuccessData(configuration.CheckPrice))
                        {
                            pa.GetSqlDb()
                            .RunQuery("insert into Company_FailConfig (CompanyId) Values (@CompanyId)",
                                      CommandType.Text,
                                      new SqlParameter[]
                            {
                                SqlDb.CreateParamteterSQL("@CompanyId", companyId, SqlDbType.BigInt)
                            });
                        }
                    }
                    catch (Exception ex)
                    {
                        pa.GetSqlDb()
                        .RunQuery("insert into Company_FailConfig (CompanyId, Error) Values (@CompanyId, @Error)",
                                  CommandType.Text,
                                  new SqlParameter[]
                        {
                            SqlDb.CreateParamteterSQL("@CompanyId", companyId, SqlDbType.BigInt),
                            SqlDb.CreateParamteterSQL("@Error", ex.Message + "\n" + ex.StackTrace,
                                                      SqlDbType.NVarChar)
                        });
                    }
                    _log.Info(string.Format("Run data {0}/  {1}", i, companyIds.Count));
                }
            }
        }
Beispiel #3
0
        private void Analysic(JobFindNew jobCrawl, HtmlDocument doc)
        {
            if (_company.Status == Common.CompanyStatus.TIN)
            {
                var product = new Product();
                product.Analytics(doc, jobCrawl.Url, _config, false, _company.Domain);
            }
            else
            {
                var product      = new ProductEntity();
                var productParse = new ProductParse();
                productParse.Analytics(product, doc, jobCrawl.Url, _config, _company.Domain);

                if (product.IsSuccessData(_config.CheckPrice))
                {
                    product.Valid = false;
                    if (!IsExistsProduct(product.ID))
                    {
                        if (!_dicDuplicate.ContainsKey(product.GetHashDuplicate()))
                        {
                            product.StatusChange.IsNew = true;
                            PushChangeProduct(product);
                            _dicDuplicate.Add(product.GetHashDuplicate(), product.ID);
                            _crcProductOldGroup.Add(product.ID);
                            _countNewProduct++;
                        }

                        else
                        {
                            _producerDuplicateProduct.PublishString(
                                Newtonsoft.Json.JsonConvert.SerializeObject(new ProductDuplicate()
                            {
                                CId   = _companyId,
                                Id    = product.ID,
                                Hash  = product.GetHashDuplicate(),
                                IdDup = _dicDuplicate[product.GetHashDuplicate()],
                                Url   = product.DetailUrl
                            }), true);
                        }
                    }
                }
            }
        }
Beispiel #4
0
        public void AnalyticsTHEGIOICAYXANHTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;

            string        url           = @"http://thegioicayxanh.vn/cay-van-phong/cay-chan-ret.html";
            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
            HtmlDocument  doc           = new HtmlDocument();

            doc.LoadHtml(html);
            var config = new Configuration(8153388634833285394);

            productParse.Analytics(productEntity, doc, url, config, "thegioicayxanh.vn");
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #5
0
        public void AnalyticsTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;

            string        url           = @"http://giadungchinhhang.vn/san-pham-chi-tiet/am-sieu-toc-philips-hd-4646--15-l-15.aspx";
            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
            HtmlDocument  doc           = new HtmlDocument();

            doc.LoadHtml(html);
            var config = new Configuration(1793534743671200240);

            productParse.Analytics(productEntity, doc, url, config, "giadungchinhhang.vn");
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #6
0
        public void AnalyticsDongho12hTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;
            string        url           = @"http://dongho12h.vn/product/fune2006w0.html";
            long          companyId     = 297705792783058114;
            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);

            html = System.Web.HttpUtility.HtmlDecode(html);
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);
            var config  = new Configuration(companyId);
            var company = new Company(companyId);

            productParse.Analytics(productEntity, doc, url, config, company.Domain);
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #7
0
        private static void Main(string[] args)
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            ProductAdapter productAdapter = new ProductAdapter(new SqlDb(ConfigCrawler.ConnectProduct));

            string strParaInput = (args.Length == 0) ? Console.ReadLine() : string.Join(" ", args);

            strParaInput = @"-c svptccache -u http://maxmobile.vn/dien-thoai/lg-g5-cu.html";

            ParameterManager p = ParameterManager.Parse(strParaInput);

            //p.SubCmd = "svptccache";
            if (p.SubCmd == "crlrl")
            {
                string domain    = p.Parameters["dm"][0];
                long   idCOmpany = productAdapter.GetCompanyIDFromDomain(domain);
                using (var worker = new WorkerReload(idCOmpany, new CancellationToken(), "", true))
                {
                    worker.StartCrawler();
                }
            }
            else if (p.SubCmd == "crlfn")
            {
            }
            else if (p.SubCmd == "svcudclss")
            {
                var c = new ConsumerClassificationToSql();
                c.StartConsume();
            }
            else if (p.SubCmd == "svptccache")
            {
                var c = new ConsumerProductChangeToCache();
                c.StartConsume();
            }
            else if (p.SubCmd == "svudprsql")
            {
                var c = new ConsumerProductChangeToSql();
                c.StartConsume();
            }
            else if (p.SubCmd == "prpt")
            {
                ProductAdapter pta = new ProductAdapter(new SqlDb(Server.ConnectionString));
                string         url = p.Parameters["u"][0].ToString();
                url = "http://maxmobile.vn/dien-thoai/lg-g5-cu.html";
                Uri           uri          = new Uri(url);
                string        domain       = Common.GetDomainFromUrl(uri);
                long          idCompanyId  = pta.GetCompanyIdByDomain(domain);
                IDownloadHtml downloader   = new DownloadHtmlCrawler();
                Configuration config       = new Configuration();
                HtmlDocument  htmlDocument = new HtmlDocument();
                var           ext          = new WebExceptionStatus();
                string        html         = downloader.GetHTML(url, 45, 2, out ext);
                htmlDocument.LoadHtml(html);
                ProductEntity pte = new ProductEntity();
                ProductParse  ppr = new ProductParse();
                ppr.Analytics(pte, htmlDocument, url, config, domain);
            }
            //ConsumerSaveEndSession consumerSaveEndSession = new ConsumerSaveEndSession();
            //consumerSaveEndSession.StartConsume();
        }