public void AnalyticsDienmayTantienTest() { Server.ConnectionString = ConfigCrawler.ConnectProduct; Server.LogConnectionString = ConfigCrawler.ConnectLog; Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler; string url = @"http://dienmaytantien.vn/detail.asp?parent_id=336&id=2204"; long companyId = 8223820966383374348; ProductParse productParse = new ProductParse(); ProductEntity productEntity = new ProductEntity(); string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2); html = System.Web.HttpUtility.HtmlDecode(html); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); var config = new Configuration(companyId); var company = new Company(companyId); productParse.Analytics(productEntity, doc, url, config, company.Domain); bool bok = productEntity.IsSuccessData(false); Assert.AreEqual(productEntity.Price, 10590000); Assert.AreEqual(bok, true); }
public void Start() { ProductAdapter pa = new ProductAdapter(new SqlDb(ConfigCrawler.ConnectProduct)); QT.Entities.Server.ConnectionString = ConfigCrawler.ConnectProduct; List <long> companyIds = pa.GetAllCompanyIdCrawler(); for (int i = 0; i < companyIds.Count; i++) { long companyId = companyIds[i]; Company company = new Company(companyId); Configuration configuration = new Configuration(companyId); ProductParse parse = new ProductParse(); ProductEntity productEntity = new ProductEntity(); HtmlDocument document = new HtmlDocument(); string url = configuration.LinkTest; string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2); if (!string.IsNullOrEmpty(html)) { document.LoadHtml(html); try { parse.Analytics(productEntity, document, configuration.LinkTest, configuration, company.Domain); if (!productEntity.IsSuccessData(configuration.CheckPrice)) { pa.GetSqlDb() .RunQuery("insert into Company_FailConfig (CompanyId) Values (@CompanyId)", CommandType.Text, new SqlParameter[] { SqlDb.CreateParamteterSQL("@CompanyId", companyId, SqlDbType.BigInt) }); } } catch (Exception ex) { pa.GetSqlDb() .RunQuery("insert into Company_FailConfig (CompanyId, Error) Values (@CompanyId, @Error)", CommandType.Text, new SqlParameter[] { SqlDb.CreateParamteterSQL("@CompanyId", companyId, SqlDbType.BigInt), SqlDb.CreateParamteterSQL("@Error", ex.Message + "\n" + ex.StackTrace, SqlDbType.NVarChar) }); } _log.Info(string.Format("Run data {0}/ {1}", i, companyIds.Count)); } } }
private void Analysic(JobFindNew jobCrawl, HtmlDocument doc) { if (_company.Status == Common.CompanyStatus.TIN) { var product = new Product(); product.Analytics(doc, jobCrawl.Url, _config, false, _company.Domain); } else { var product = new ProductEntity(); var productParse = new ProductParse(); productParse.Analytics(product, doc, jobCrawl.Url, _config, _company.Domain); if (product.IsSuccessData(_config.CheckPrice)) { product.Valid = false; if (!IsExistsProduct(product.ID)) { if (!_dicDuplicate.ContainsKey(product.GetHashDuplicate())) { product.StatusChange.IsNew = true; PushChangeProduct(product); _dicDuplicate.Add(product.GetHashDuplicate(), product.ID); _crcProductOldGroup.Add(product.ID); _countNewProduct++; } else { _producerDuplicateProduct.PublishString( Newtonsoft.Json.JsonConvert.SerializeObject(new ProductDuplicate() { CId = _companyId, Id = product.ID, Hash = product.GetHashDuplicate(), IdDup = _dicDuplicate[product.GetHashDuplicate()], Url = product.DetailUrl }), true); } } } } }
public void AnalyticsTHEGIOICAYXANHTest() { Server.ConnectionString = ConfigCrawler.ConnectProduct; Server.LogConnectionString = ConfigCrawler.ConnectLog; Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler; string url = @"http://thegioicayxanh.vn/cay-van-phong/cay-chan-ret.html"; ProductParse productParse = new ProductParse(); ProductEntity productEntity = new ProductEntity(); string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); var config = new Configuration(8153388634833285394); productParse.Analytics(productEntity, doc, url, config, "thegioicayxanh.vn"); bool bok = productEntity.IsSuccessData(false); Assert.AreEqual(productEntity.Price, 10590000); Assert.AreEqual(bok, true); }
public void AnalyticsTest() { Server.ConnectionString = ConfigCrawler.ConnectProduct; Server.LogConnectionString = ConfigCrawler.ConnectLog; Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler; string url = @"http://giadungchinhhang.vn/san-pham-chi-tiet/am-sieu-toc-philips-hd-4646--15-l-15.aspx"; ProductParse productParse = new ProductParse(); ProductEntity productEntity = new ProductEntity(); string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); var config = new Configuration(1793534743671200240); productParse.Analytics(productEntity, doc, url, config, "giadungchinhhang.vn"); bool bok = productEntity.IsSuccessData(false); Assert.AreEqual(productEntity.Price, 10590000); Assert.AreEqual(bok, true); }
public void AnalyticsDongho12hTest() { Server.ConnectionString = ConfigCrawler.ConnectProduct; Server.LogConnectionString = ConfigCrawler.ConnectLog; Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler; string url = @"http://dongho12h.vn/product/fune2006w0.html"; long companyId = 297705792783058114; ProductParse productParse = new ProductParse(); ProductEntity productEntity = new ProductEntity(); string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(url, 45, 2); html = System.Web.HttpUtility.HtmlDecode(html); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); var config = new Configuration(companyId); var company = new Company(companyId); productParse.Analytics(productEntity, doc, url, config, company.Domain); bool bok = productEntity.IsSuccessData(false); Assert.AreEqual(productEntity.Price, 10590000); Assert.AreEqual(bok, true); }
private void ParseProduct(Job job, ProductEntity product) { DelayTime(); WebExceptionStatus status; var htm = GetHtmlCode(job.url, out status); _countVisited++; if (status == WebExceptionStatus.Timeout || status == WebExceptionStatus.ConnectFailure) { _log.Info(string.Format("Fail download link: {0}", status)); } else { var doc = new HtmlDocument(); doc.LoadHtml(htm); _productParse.Analytics(product, doc, job.url, _config, _company.Domain); if (product.IsSuccessData(_config.CheckPrice)) { CheckDuplicate(product); if (!product.StatusChange.IsDuplicate) { CheckDeleteProduct(product); CheckChangeBasic(product); CheckChangeDesc(product); CheckChangePrice(product); CheckChangeImg(product); } } else { CheckDelete(product); } } }
private static void Main(string[] args) { Server.ConnectionString = ConfigCrawler.ConnectProduct; Server.ConnectionStringCrawler = ConfigCrawler.ConnectionCrawler; Server.LogConnectionString = ConfigCrawler.ConnectLog; ProductAdapter productAdapter = new ProductAdapter(new SqlDb(ConfigCrawler.ConnectProduct)); string strParaInput = (args.Length == 0) ? Console.ReadLine() : string.Join(" ", args); strParaInput = @"-c svptccache -u http://maxmobile.vn/dien-thoai/lg-g5-cu.html"; ParameterManager p = ParameterManager.Parse(strParaInput); //p.SubCmd = "svptccache"; if (p.SubCmd == "crlrl") { string domain = p.Parameters["dm"][0]; long idCOmpany = productAdapter.GetCompanyIDFromDomain(domain); using (var worker = new WorkerReload(idCOmpany, new CancellationToken(), "", true)) { worker.StartCrawler(); } } else if (p.SubCmd == "crlfn") { } else if (p.SubCmd == "svcudclss") { var c = new ConsumerClassificationToSql(); c.StartConsume(); } else if (p.SubCmd == "svptccache") { var c = new ConsumerProductChangeToCache(); c.StartConsume(); } else if (p.SubCmd == "svudprsql") { var c = new ConsumerProductChangeToSql(); c.StartConsume(); } else if (p.SubCmd == "prpt") { ProductAdapter pta = new ProductAdapter(new SqlDb(Server.ConnectionString)); string url = p.Parameters["u"][0].ToString(); url = "http://maxmobile.vn/dien-thoai/lg-g5-cu.html"; Uri uri = new Uri(url); string domain = Common.GetDomainFromUrl(uri); long idCompanyId = pta.GetCompanyIdByDomain(domain); IDownloadHtml downloader = new DownloadHtmlCrawler(); Configuration config = new Configuration(); HtmlDocument htmlDocument = new HtmlDocument(); var ext = new WebExceptionStatus(); string html = downloader.GetHTML(url, 45, 2, out ext); htmlDocument.LoadHtml(html); ProductEntity pte = new ProductEntity(); ProductParse ppr = new ProductParse(); ppr.Analytics(pte, htmlDocument, url, config, domain); } //ConsumerSaveEndSession consumerSaveEndSession = new ConsumerSaveEndSession(); //consumerSaveEndSession.StartConsume(); }