private void btnDuyetTang_Click(object sender, EventArgs e) { ProductSaleNewDataAdapter productAdapter = new ProductSaleNewDataAdapter(new QT.Entities.Data.SqlDb(QT.Entities.Server.ConnectionStringCrawler)); try { string root = "http://bantoyota.com.vn"; Uri uriroot = new Uri(root); string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(root, 45, 2); GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); var nodeMakers = doc.DocumentNode.SelectNodes(@"//ul[@class='sub-menu']//li//a"); //Danh sách các dòng xe foreach (var nodeModelCar in nodeMakers) //Vào node dòng xe { string strModelCar = nodeModelCar.InnerText.Trim(); if (productAdapter.CheckExitFullLink("toyota->" + strModelCar) >= 0) { string urlToModel = nodeModelCar.Attributes["href"].Value.ToString(); urlToModel = uriroot.Scheme + @"://" + uriroot.Host + urlToModel; GABIZ.Base.HtmlAgilityPack.HtmlDocument docModel = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); docModel.LoadHtml(System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlToModel, 45, 2))); string xPathKeyWord = "//div[@class='rightsearch classhot']//div[@class='item']//a"; var nodesKeyWords = docModel.DocumentNode.SelectNodes(xPathKeyWord); if (nodesKeyWords != null) { foreach (var nodeKeyWord in nodesKeyWords) { string keyWord = nodeKeyWord.Attributes["title"].Value.Trim().ToLower().Replace(" ", ""); string urlDetailKeyWord = nodeKeyWord.Attributes["href"].Value.ToString(); urlDetailKeyWord = uriroot.Scheme + @"://" + uriroot.Host + urlDetailKeyWord; GABIZ.Base.HtmlAgilityPack.HtmlDocument docKeyWordDetail = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); docKeyWordDetail.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlDetailKeyWord, 45, 2)); string descXPath = @"//meta[@name='description']/@content"; var node = docKeyWordDetail.DocumentNode.SelectSingleNode(descXPath); string description = "";// (node == null) ? "" : Common.GetTextOfXPath(descXPath,,)[0]; //Lưu dữ liệu. try { productAdapter.SaveKeyWord("toyota", strModelCar, keyWord.Replace("bán xe", ""), keyWord, description); } catch (Exception ex) { MessageBox.Show(ex.Message); } } } } } } catch (Exception ex) { MessageBox.Show(ex.Message); } MessageBox.Show("Hoàn tất!"); }
private void btnKeyWordXe360_Click(object sender, EventArgs e) { ProductSaleNewDataAdapter productAdapter = new ProductSaleNewDataAdapter(new QT.Entities.Data.SqlDb(QT.Entities.Server.ConnectionStringCrawler)); try { string root = "http://xe360.vn/ban-oto/toyota-corolla.html"; Uri uriroot = new Uri(root); string html = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(root, 45, 2, true); GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); //var nodeMakers = doc.DocumentNode.SelectNodes(@"//li[@class='modfloat']//a"); //Danh sách các dòng xe var nodeMakers = doc.DocumentNode.SelectNodes(@"//ul[@class='top-level']//li[@class='parent']/a"); //Danh sách các dòng xe; foreach (var nodeModelCar in nodeMakers) //Vào node dòng xe { string strModelCar = nodeModelCar.Attributes["title"].Value.Replace("-", " ").ToLower().Trim(); int iCategories = productAdapter.CheckExitFullLink01(strModelCar); if (iCategories >= 0) { string urlToModel = nodeModelCar.Attributes["href"].Value.ToString(); urlToModel = uriroot.Scheme + @"://" + uriroot.Host + urlToModel; GABIZ.Base.HtmlAgilityPack.HtmlDocument docModel = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); docModel.LoadHtml(System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlToModel, 45, 2, true))); string xPathKeyWord = @"//meta[@name='keywords']"; var nodesKeyWords = docModel.DocumentNode.SelectNodes(xPathKeyWord); if (nodesKeyWords != null) { foreach (var nodeKeyWord in nodesKeyWords) { string keyWord = Common.RemoveDumplicateSpace(nodeKeyWord.Attributes["content"].Value.Trim().ToLower()); //string urlDetailKeyWord = nodeKeyWord.Attributes["href"].Value.ToString(); //urlDetailKeyWord = uriroot.Scheme + @"://" + uriroot.Host + urlDetailKeyWord; //GABIZ.Base.HtmlAgilityPack.HtmlDocument docKeyWordDetail = new GABIZ.Base.HtmlAgilityPack.HtmlDocument(); //docKeyWordDetail.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlDetailKeyWord, 45, 2)); //string descXPath = @"//meta[@name='description']/@content"; //var node = docKeyWordDetail.DocumentNode.SelectSingleNode(descXPath); string description = ""; //Lưu dữ liệu. try { foreach (var str in keyWord.Split(new char[] { ',' }, 1000, StringSplitOptions.RemoveEmptyEntries)) { productAdapter.SaveKeyWord(iCategories, str.Trim().Replace("bán xe", ""), str, description); } } catch (Exception ex) { MessageBox.Show(ex.Message); } } } } } } catch (Exception ex) { MessageBox.Show(ex.Message); } MessageBox.Show("Hoàn tất!"); }