예제 #1
0
        private void btnDuyetTang_Click(object sender, EventArgs e)
        {
            ProductSaleNewDataAdapter productAdapter = new ProductSaleNewDataAdapter(new QT.Entities.Data.SqlDb(QT.Entities.Server.ConnectionStringCrawler));

            try
            {
                string root    = "http://bantoyota.com.vn";
                Uri    uriroot = new Uri(root);
                string html    = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(root, 45, 2);
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                var nodeMakers = doc.DocumentNode.SelectNodes(@"//ul[@class='sub-menu']//li//a"); //Danh sách các dòng xe
                foreach (var nodeModelCar in nodeMakers)                                          //Vào node dòng xe
                {
                    string strModelCar = nodeModelCar.InnerText.Trim();
                    if (productAdapter.CheckExitFullLink("toyota->" + strModelCar) >= 0)
                    {
                        string urlToModel = nodeModelCar.Attributes["href"].Value.ToString();
                        urlToModel = uriroot.Scheme + @"://" + uriroot.Host + urlToModel;

                        GABIZ.Base.HtmlAgilityPack.HtmlDocument docModel = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                        docModel.LoadHtml(System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlToModel, 45, 2)));
                        string xPathKeyWord  = "//div[@class='rightsearch classhot']//div[@class='item']//a";
                        var    nodesKeyWords = docModel.DocumentNode.SelectNodes(xPathKeyWord);
                        if (nodesKeyWords != null)
                        {
                            foreach (var nodeKeyWord in nodesKeyWords)
                            {
                                string keyWord          = nodeKeyWord.Attributes["title"].Value.Trim().ToLower().Replace("  ", "");
                                string urlDetailKeyWord = nodeKeyWord.Attributes["href"].Value.ToString();
                                urlDetailKeyWord = uriroot.Scheme + @"://" + uriroot.Host + urlDetailKeyWord;


                                GABIZ.Base.HtmlAgilityPack.HtmlDocument docKeyWordDetail = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                docKeyWordDetail.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlDetailKeyWord, 45, 2));

                                string descXPath   = @"//meta[@name='description']/@content";
                                var    node        = docKeyWordDetail.DocumentNode.SelectSingleNode(descXPath);
                                string description = "";// (node == null) ? "" : Common.GetTextOfXPath(descXPath,,)[0];

                                //Lưu dữ liệu.
                                try
                                {
                                    productAdapter.SaveKeyWord("toyota", strModelCar, keyWord.Replace("bán xe", ""), keyWord, description);
                                }
                                catch (Exception ex)
                                {
                                    MessageBox.Show(ex.Message);
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
            MessageBox.Show("Hoàn tất!");
        }
예제 #2
0
        public FrmConfigXPath()
        {
            try
            {
                InitializeComponent();


                sqlDb            = new SqlDb(QT.Entities.Server.ConnectionStringCrawler);
                raovatSqlAdapter = new RaoVatSQLAdapter(sqlDb);
                productAdapter   = new ProductSaleNewDataAdapter(sqlDb);

                this.hanlerContentOfHtml = new QT.Entities.RaoVat.HandlerContentOfHtml();
                InitData();
            }
            catch (Exception ex)
            {
            }
        }
예제 #3
0
        private void btnKeyWordXe360_Click(object sender, EventArgs e)
        {
            ProductSaleNewDataAdapter productAdapter = new ProductSaleNewDataAdapter(new QT.Entities.Data.SqlDb(QT.Entities.Server.ConnectionStringCrawler));

            try
            {
                string root    = "http://xe360.vn/ban-oto/toyota-corolla.html";
                Uri    uriroot = new Uri(root);
                string html    = GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(root, 45, 2, true);
                GABIZ.Base.HtmlAgilityPack.HtmlDocument doc = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(html);
                //var nodeMakers = doc.DocumentNode.SelectNodes(@"//li[@class='modfloat']//a"); //Danh sách các dòng xe

                var nodeMakers = doc.DocumentNode.SelectNodes(@"//ul[@class='top-level']//li[@class='parent']/a"); //Danh sách các dòng xe;
                foreach (var nodeModelCar in nodeMakers)                                                           //Vào node dòng xe
                {
                    string strModelCar = nodeModelCar.Attributes["title"].Value.Replace("-", " ").ToLower().Trim();
                    int    iCategories = productAdapter.CheckExitFullLink01(strModelCar);
                    if (iCategories >= 0)
                    {
                        string urlToModel = nodeModelCar.Attributes["href"].Value.ToString();
                        urlToModel = uriroot.Scheme + @"://" + uriroot.Host + urlToModel;

                        GABIZ.Base.HtmlAgilityPack.HtmlDocument docModel = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                        docModel.LoadHtml(System.Web.HttpUtility.HtmlDecode(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlToModel, 45, 2, true)));
                        string xPathKeyWord  = @"//meta[@name='keywords']";
                        var    nodesKeyWords = docModel.DocumentNode.SelectNodes(xPathKeyWord);
                        if (nodesKeyWords != null)
                        {
                            foreach (var nodeKeyWord in nodesKeyWords)
                            {
                                string keyWord = Common.RemoveDumplicateSpace(nodeKeyWord.Attributes["content"].Value.Trim().ToLower());


                                //string urlDetailKeyWord = nodeKeyWord.Attributes["href"].Value.ToString();
                                //urlDetailKeyWord = uriroot.Scheme + @"://" + uriroot.Host + urlDetailKeyWord;


                                //GABIZ.Base.HtmlAgilityPack.HtmlDocument docKeyWordDetail = new GABIZ.Base.HtmlAgilityPack.HtmlDocument();
                                //docKeyWordDetail.LoadHtml(GABIZ.Base.HtmlUrl.HTMLTransmitter.getHTML(urlDetailKeyWord, 45, 2));

                                //string descXPath = @"//meta[@name='description']/@content";
                                //var node = docKeyWordDetail.DocumentNode.SelectSingleNode(descXPath);
                                string description = "";

                                //Lưu dữ liệu.
                                try
                                {
                                    foreach (var str in keyWord.Split(new char[] { ',' }, 1000, StringSplitOptions.RemoveEmptyEntries))
                                    {
                                        productAdapter.SaveKeyWord(iCategories, str.Trim().Replace("bán xe", ""), str, description);
                                    }
                                }
                                catch (Exception ex)
                                {
                                    MessageBox.Show(ex.Message);
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
            MessageBox.Show("Hoàn tất!");
        }