Beispiel #1
0
        public void AnalyticsDienmayTantienTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;

            string url       = @"http://dienmaytantien.vn/detail.asp?parent_id=336&id=2204";
            long   companyId = 8223820966383374348;

            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);

            html = System.Web.HttpUtility.HtmlDecode(html);
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);
            var config  = new Configuration(companyId);
            var company = new Company(companyId);

            productParse.Analytics(productEntity, doc, url, config, company.Domain);
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #2
0
        public void Start()
        {
            ProductAdapter pa = new ProductAdapter(new SqlDb(ConfigCrawler.ConnectProduct));

            QT.Entities.Server.ConnectionString = ConfigCrawler.ConnectProduct;

            List <long> companyIds = pa.GetAllCompanyIdCrawler();

            for (int i = 0; i < companyIds.Count; i++)
            {
                long          companyId     = companyIds[i];
                Company       company       = new Company(companyId);
                Configuration configuration = new Configuration(companyId);
                ProductParse  parse         = new ProductParse();
                ProductEntity productEntity = new ProductEntity();

                HtmlDocument document = new HtmlDocument();
                string       url      = configuration.LinkTest;
                string       html     = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);

                if (!string.IsNullOrEmpty(html))
                {
                    document.LoadHtml(html);
                    try
                    {
                        parse.Analytics(productEntity, document, configuration.LinkTest, configuration, company.Domain);
                        if (!productEntity.IsSuccessData(configuration.CheckPrice))
                        {
                            pa.GetSqlDb()
                            .RunQuery("insert into Company_FailConfig (CompanyId) Values (@CompanyId)",
                                      CommandType.Text,
                                      new SqlParameter[]
                            {
                                SqlDb.CreateParamteterSQL("@CompanyId", companyId, SqlDbType.BigInt)
                            });
                        }
                    }
                    catch (Exception ex)
                    {
                        pa.GetSqlDb()
                        .RunQuery("insert into Company_FailConfig (CompanyId, Error) Values (@CompanyId, @Error)",
                                  CommandType.Text,
                                  new SqlParameter[]
                        {
                            SqlDb.CreateParamteterSQL("@CompanyId", companyId, SqlDbType.BigInt),
                            SqlDb.CreateParamteterSQL("@Error", ex.Message + "\n" + ex.StackTrace,
                                                      SqlDbType.NVarChar)
                        });
                    }
                    _log.Info(string.Format("Run data {0}/  {1}", i, companyIds.Count));
                }
            }
        }
Beispiel #3
0
        public void StartRun()
        {
            WebExceptionStatus outS = WebExceptionStatus.UnknownError;

            foreach (var linkExtraction in this.configurationHotProduct.HotProduct_Link.Split(new char[] { ',', '\n', ';' }, StringSplitOptions.RemoveEmptyEntries))
            {
                this.LogData("Get html of cat page");
                string html = downloadHtml.GetHTML(linkExtraction, 45, 2, out outS, this.configurationHotProduct.HotProduct_UseSelenium);
                if (!string.IsNullOrEmpty(html))
                {
                    HtmlDocument htmlDocument = new HtmlDocument();
                    htmlDocument.LoadHtml(html);
                    var nodeLinks = htmlDocument.DocumentNode.SelectNodes(this.configurationHotProduct.HotProduct_Xpath);
                    if (nodeLinks != null)
                    {
                        foreach (var VARIABLE in nodeLinks)
                        {
                            try
                            {
                                string shortLink = VARIABLE.GetAttributeValue("href", "");

                                string fullLink = Common.GetAbsoluteUrl(shortLink, new Uri(this.company.Website));
                                this.LogData(string.Format("Process link product {0}", fullLink));
                                string htmlLinkProduct = this.downloadHtml.GetHTML(fullLink, 45, 2, out outS);

                                HtmlDocument h = new HtmlDocument();
                                h.LoadHtml(htmlLinkProduct);
                                if (!string.IsNullOrEmpty(htmlLinkProduct))
                                {
                                    ProductEntity productEntity = new ProductEntity();
                                    this.producerParser.Analytics(productEntity, h, fullLink, this.configuration, this.company.Domain);
                                    if (productEntity.IsSuccessData(true))
                                    {
                                        this.productAdapter.UpsertProductHot(productEntity);
                                        this.LogData(string.Format("Saved a product to database. {0} {1} {2}", productEntity.ID,
                                                                   productEntity.Name, productEntity.Price));
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                LogData(string.Format("Error: {0} {1}", ex.Message, ex.StackTrace));
                            }
                        }
                    }
                }
            }
        }
Beispiel #4
0
        private void Analysic(JobFindNew jobCrawl, HtmlDocument doc)
        {
            if (_company.Status == Common.CompanyStatus.TIN)
            {
                var product = new Product();
                product.Analytics(doc, jobCrawl.Url, _config, false, _company.Domain);
            }
            else
            {
                var product      = new ProductEntity();
                var productParse = new ProductParse();
                productParse.Analytics(product, doc, jobCrawl.Url, _config, _company.Domain);

                if (product.IsSuccessData(_config.CheckPrice))
                {
                    product.Valid = false;
                    if (!IsExistsProduct(product.ID))
                    {
                        if (!_dicDuplicate.ContainsKey(product.GetHashDuplicate()))
                        {
                            product.StatusChange.IsNew = true;
                            PushChangeProduct(product);
                            _dicDuplicate.Add(product.GetHashDuplicate(), product.ID);
                            _crcProductOldGroup.Add(product.ID);
                            _countNewProduct++;
                        }

                        else
                        {
                            _producerDuplicateProduct.PublishString(
                                Newtonsoft.Json.JsonConvert.SerializeObject(new ProductDuplicate()
                            {
                                CId   = _companyId,
                                Id    = product.ID,
                                Hash  = product.GetHashDuplicate(),
                                IdDup = _dicDuplicate[product.GetHashDuplicate()],
                                Url   = product.DetailUrl
                            }), true);
                        }
                    }
                }
            }
        }
Beispiel #5
0
        public void AnalyticsTHEGIOICAYXANHTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;

            string        url           = @"http://thegioicayxanh.vn/cay-van-phong/cay-chan-ret.html";
            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
            HtmlDocument  doc           = new HtmlDocument();

            doc.LoadHtml(html);
            var config = new Configuration(8153388634833285394);

            productParse.Analytics(productEntity, doc, url, config, "thegioicayxanh.vn");
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #6
0
        public void AnalyticsTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;

            string        url           = @"http://giadungchinhhang.vn/san-pham-chi-tiet/am-sieu-toc-philips-hd-4646--15-l-15.aspx";
            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);
            HtmlDocument  doc           = new HtmlDocument();

            doc.LoadHtml(html);
            var config = new Configuration(1793534743671200240);

            productParse.Analytics(productEntity, doc, url, config, "giadungchinhhang.vn");
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #7
0
        public void AnalyticsDongho12hTest()
        {
            Server.ConnectionString        = ConfigCrawler.ConnectProduct;
            Server.LogConnectionString     = ConfigCrawler.ConnectLog;
            Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler;
            string        url           = @"http://dongho12h.vn/product/fune2006w0.html";
            long          companyId     = 297705792783058114;
            ProductParse  productParse  = new ProductParse();
            ProductEntity productEntity = new ProductEntity();
            string        html          = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2);

            html = System.Web.HttpUtility.HtmlDecode(html);
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);
            var config  = new Configuration(companyId);
            var company = new Company(companyId);

            productParse.Analytics(productEntity, doc, url, config, company.Domain);
            bool bok = productEntity.IsSuccessData(false);

            Assert.AreEqual(productEntity.Price, 10590000);
            Assert.AreEqual(bok, true);
        }
Beispiel #8
0
        private void ParseProduct(Job job, ProductEntity product)
        {
            DelayTime();
            WebExceptionStatus status;
            var htm = GetHtmlCode(job.url, out status);


            _countVisited++;
            if (status == WebExceptionStatus.Timeout || status == WebExceptionStatus.ConnectFailure)
            {
                _log.Info(string.Format("Fail download link: {0}", status));
            }
            else
            {
                var doc = new HtmlDocument();
                doc.LoadHtml(htm);

                _productParse.Analytics(product, doc, job.url, _config, _company.Domain);
                if (product.IsSuccessData(_config.CheckPrice))
                {
                    CheckDuplicate(product);
                    if (!product.StatusChange.IsDuplicate)
                    {
                        CheckDeleteProduct(product);
                        CheckChangeBasic(product);
                        CheckChangeDesc(product);
                        CheckChangePrice(product);
                        CheckChangeImg(product);
                    }
                }
                else
                {
                    CheckDelete(product);
                }
            }
        }
Beispiel #9
0
        public void Analytics(ProductEntity pt, HtmlDocument doc, String detailUrl, Configuration conf, string domain)
        {
            try
            {
                if (pt.ID == 0)
                {
                    pt.ID = Common.GetIDProduct(detailUrl);
                }
                pt.CompanyId = conf.CompanyID;
                pt.Domain    = domain;
                pt.DetailUrl = detailUrl;

                pt.Name             = ParseName(doc, conf.ProductNameXPath);
                pt.HashName         = Common.GetHashNameProduct(domain, pt.Name);
                pt.ShortDescription = ParseShortDescription(doc, conf.ShortDescriptionXPath);
                pt.Categories       = ParseCategories(doc, conf.CategoryXPath, domain, conf.RemoveLastItemClassification, detailUrl);

                pt.OriginPrice = ParsePrice(doc, conf.OriginPriceXPath, conf.CheckPrice, new Uri(detailUrl), null);
                pt.Price       = ParsePrice(doc, conf.PriceXPath, conf.CheckPrice, new Uri(detailUrl), conf.RegexPrice.Split(Common.arSplitToList, StringSplitOptions.RemoveEmptyEntries).ToList());

                pt.VATStatus = conf.VATStatus;
                pt.VatInfo   = ParseVatInfo(doc, conf.VATInfoXPath);
                if (!string.IsNullOrEmpty(pt.VatInfo))
                {
                    if (pt.VatInfo.ToLower().Contains("chưa"))
                    {
                        pt.VATStatus = 0;
                    }
                    if (pt.VatInfo.ToLower().Contains("đã"))
                    {
                        pt.VATStatus = 1;
                    }
                }
                pt.PromotionInfo = ParsePromotionInfo(doc, conf.PromotionInfoXPath);

                pt.StartDeal = ParseStartDeal(doc, conf.StartDealXPath);
                pt.EndDeal   = ParseEndDeal(doc, conf.EndDealXPath);

                pt.Warranty       = ParseWarranty(doc, conf.WarrantyXPath);
                pt.Status         = ParseStatus(doc, conf.StatusXPath);
                pt.Instock        = Common.GetProductInstockFormStatus(pt.Status);
                pt.ImageUrls      = ParseImages(doc, conf.ImageXPath, conf.AutoFixLinkImage, detailUrl);
                pt.Manufacture    = ParseManufacture(doc, conf.ManufactureXPath);
                pt.Origin         = ParseOrigin(doc, conf.OriginXPath);
                pt.Promotion      = ParsePromotion(doc, conf.PromotionXPath);
                pt.Summary        = ParseSummary(doc, conf.SummaryXPath);
                pt.ProductContent = ParseContent(doc, conf.ContentXPath);
                pt.VideoDescHtml  = GetDesc(doc, conf.VideoXpath.Split(Common.arSplitToList, StringSplitOptions.RemoveEmptyEntries).ToList());
                pt.SpecsDescHtml  = GetDesc(doc, conf.SpecsXPath.Split(Common.arSplitToList, StringSplitOptions.RemoveEmptyEntries).ToList());
                pt.FullDescHtml   = GetDesc(doc, conf.FullDescXPath.Split(Common.arSplitToList, StringSplitOptions.RemoveEmptyEntries).ToList());
                pt.ShortDescHtml  = GetDesc(doc, conf.ShortDescriptionXPath);


                pt.Valid = pt.IsSuccessData(conf.CheckPrice);
            }
            catch (Exception ex)
            {
                Exception ex01 = new Exception(string.Format("Company: {0} Product: {1} Url: {2}: {3}", pt.CompanyId, pt.ID, pt.DetailUrl, ex.Message + ex.StackTrace));
                throw ex01;
            }
        }