Exemplo n.º 1
0
        void CrawSite_CrawInfoEvent(CrawInfoCls CrawInfo)
        {
            dbModelDataContext db = new dbModelDataContext();
            var iCheck            = (from i in db.CRItemPrices
                                     where i.SiteCode == CrawInfo.SiteCode && i.ItemSiteCode == CrawInfo.ItemSiteCode
                                     select i).ToList();

            if (iCheck.Count > 0)
            {
                db.CRItemPrices.DeleteAllOnSubmit(iCheck);
                db.SubmitChanges();
            }

            CRItemPrice iNew = new CRItemPrice();

            iNew.SiteCode      = CrawInfo.SiteCode;
            iNew.ItemSiteCode  = CrawInfo.ItemSiteCode;
            iNew.ItemBrand     = CrawInfo.ItemBrand;
            iNew.ItemSiteName  = CrawInfo.ItemSiteName;
            iNew.SiteItemGroup = CrawInfo.SiteItemGroup;
            iNew.SitePrice     = CrawInfo.SitePrice;
            iNew.UrlCheck      = CrawInfo.UrlCheck;
            iNew.CreateDate    = DateTime.Now;
            iNew.CreateBy      = "SoftWare";
            db.CRItemPrices.InsertOnSubmit(iNew);
            db.SubmitChanges();


            cr_LogEvent(string.Format("Mã hàng: {0}, giá: {1}, Thương hiệu: {2}", CrawInfo.ItemSiteCode, CrawInfo.SitePrice, CrawInfo.ItemBrand));
        }
Exemplo n.º 2
0
 public void RaiseCrawInfo(CrawInfoCls CrawInfo)
 {
     if (CrawInfoEvent != null)
     {
         CrawInfoEvent(CrawInfo);
     }
 }
Exemplo n.º 3
0
        public void LoadCrawSite(string url)
        {
            int    CurrentItem = 0;
            string baseUrl     = string.Empty;
            int    CurrentPage = 1;

            baseUrl = url + string.Format("?page={0}", CurrentPage.ToString());

            HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl);

            var listNodes = document.DocumentNode.SelectNodes("//ul[@class='cate ']/li");

            if (listNodes == null)
            {
                return;
            }
            foreach (var ulItem in listNodes)
            {
                if (GlobalEnv.StopRuning)
                {
                    break;
                }

                string ItemSiteName = ulItem.SelectSingleNode(ulItem.XPath + "//h3") != null?ulItem.SelectSingleNode(ulItem.XPath + "//h3").InnerText : string.Empty;

                string SitePrice = ulItem.SelectSingleNode(ulItem.XPath + "/strong") != null?ulItem.SelectSingleNode(ulItem.XPath + "/strong").InnerText : string.Empty;

                string linkItemCode = ulItem.SelectSingleNode(ulItem.XPath + "/div/a") != null?ulItem.SelectSingleNode(ulItem.XPath + "/div/a").Attributes["href"].Value : string.Empty;

                string ItemSiteCode = linkItemCode.Substring(linkItemCode.LastIndexOf('/') + 1, linkItemCode.Length - linkItemCode.LastIndexOf('/') - 1);
                string ItemBrand    = ItemSiteCode.Substring(0, ItemSiteCode.IndexOf('-'));

                SitePrice = SitePrice.Replace("₫", string.Empty);
                SitePrice = SitePrice.Replace(".", string.Empty);

                CrawInfoCls CrawInfo = new CrawInfoCls();
                CrawInfo.SiteItemGroup = this.SiteItemGroup;
                CrawInfo.ItemSiteCode  = ItemSiteCode;
                CrawInfo.SiteCode      = this.SiteCode;
                CrawInfo.ItemBrand     = ItemBrand;
                CrawInfo.SitePrice     = Convert.ToDouble(SitePrice);
                CrawInfo.ItemSiteName  = ItemSiteName;
                CrawInfo.UrlCheck      = baseUrl;

                RaiseCrawInfo(CrawInfo);
            }

            GlobalEnv.Waiting = false;
        }
Exemplo n.º 4
0
        public void LoadCrawSite(string url)
        {
            string baseUrl     = string.Empty;
            int    CurrentPage = 1;

            baseUrl = url + string.Format("?p={0}", CurrentPage.ToString());

            HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl);
            var listNodes = document.DocumentNode.SelectNodes("//div[@class='category-products']/ol[@class='products-list']");

            if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li") == null)
            {
                return;
            }

            var listPages = document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li").Count;
            int MaxPage   = 0;

            for (int iIndex = 0; iIndex < listPages; iIndex++)
            {
                if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex] != null)
                {
                    int checkPage = 0;
                    int.TryParse(document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex].InnerText, out checkPage);
                    if (checkPage > MaxPage)
                    {
                        MaxPage = checkPage;
                    }
                }
            }


            string fisrtItemCode = string.Empty;
            bool   breakLoop     = false;

            while (CurrentPage < MaxPage + 2)
            {
                RaiseLog("Bắt đầu quét trang: " + baseUrl);
                CurrentPage += 1;
                baseUrl      = url + string.Format("?p={0}", CurrentPage.ToString());


                var itemNodes = document.DocumentNode.SelectNodes("//div[@class='category-products']/ol[@class='products-list']/li[@class='item']");
                if (itemNodes != null)
                {
                    foreach (var iNode in itemNodes)
                    {
                        if (GlobalEnv.StopRuning)
                        {
                            break;
                        }

                        string ItemSiteName = iNode.SelectSingleNode("div[@class='product-shop']/div/div/h2[@class='product-name']") != null?iNode.SelectSingleNode("div[@class='product-shop']/div/div/h2[@class='product-name']").InnerText.Trim() : string.Empty;

                        string ItemBrand    = ItemSiteName.Split(' ').Count <string>() > 1 ? ItemSiteName.Split(' ')[ItemSiteName.Split(' ').Count <string>() - 2] : string.Empty;
                        string ItemSiteCode = ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) > 0 ? ItemSiteName.Substring(ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()), ItemSiteName.Length - ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower())).Trim() : ItemSiteName.Substring(ItemSiteName.LastIndexOf(" "), ItemSiteName.Length - ItemSiteName.LastIndexOf(" "));
                        string SitePrice    = iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/p[@class='special-price']/span[@class='price']") != null?iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/p[@class='special-price']/span[@class='price']").InnerText.Trim() : string.Empty;

                        if (string.IsNullOrEmpty(SitePrice))
                        {
                            SitePrice = iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/span[@class='regular-price']/span[@class='price']") != null?iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/span[@class='regular-price']/span[@class='price']").InnerText.Trim() : string.Empty;
                        }


                        SitePrice = SitePrice.Replace("₫", string.Empty);
                        SitePrice = SitePrice.Replace(".", string.Empty);
                        SitePrice = SitePrice.Trim();

                        if (string.IsNullOrEmpty(SitePrice))
                        {
                            continue;
                        }

                        if (!string.IsNullOrEmpty(ItemSiteCode) && ItemSiteCode == fisrtItemCode)
                        {
                            breakLoop = true;
                            break;
                        }

                        CrawInfoCls CrawInfo = new CrawInfoCls();
                        CrawInfo.SiteItemGroup = this.SiteItemGroup;
                        CrawInfo.ItemSiteCode  = ItemSiteCode;
                        CrawInfo.SiteCode      = this.SiteCode;
                        CrawInfo.ItemBrand     = ItemBrand;
                        CrawInfo.SitePrice     = Convert.ToDouble(SitePrice);
                        CrawInfo.ItemSiteName  = ItemSiteName;
                        CrawInfo.UrlCheck      = baseUrl;

                        //Gán mã số 1 để check
                        if (string.IsNullOrEmpty(fisrtItemCode))
                        {
                            fisrtItemCode = ItemSiteCode;
                        }

                        RaiseCrawInfo(CrawInfo);
                    }
                }

                //Xử lý chốt
                document  = LoadPage(baseUrl);
                listNodes = document.DocumentNode.SelectNodes("//div[@class='category-products']/ol[@class='products-list']");
                if (listNodes == null)
                {
                    continue;
                }

                for (int iIndex = 0; iIndex < listPages; iIndex++)
                {
                    if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li") == null)
                    {
                        break;
                    }

                    if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex] != null)
                    {
                        int checkPage = 0;
                        int.TryParse(document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex].InnerText, out checkPage);
                        if (checkPage > MaxPage)
                        {
                            MaxPage = checkPage;
                        }
                    }
                }
            }

            GlobalEnv.Waiting = false;
        }
Exemplo n.º 5
0
        public void LoadCrawSite(string url)
        {
            string baseUrl     = string.Empty;
            int    CurrentPage = 1;

            if (url.LastIndexOf("/") < url.Length - 2)
            {
                baseUrl = url + string.Format("/?trang={0}", CurrentPage.ToString());
            }
            else
            {
                baseUrl = url + string.Format("?trang={0}", CurrentPage.ToString());
            }

            HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl);
            var    listNodes     = document.DocumentNode.SelectNodes("//div[@class='pca-pl-l']");
            string fisrtItemCode = string.Empty;
            bool   breakLoop     = false;

            while (listNodes != null && breakLoop == false)
            {
                CurrentPage += 1;
                if (url.LastIndexOf('/') >= url.Length - 1)
                {
                    baseUrl = url + string.Format("?trang={0}", CurrentPage.ToString());
                }
                else
                {
                    baseUrl = url + string.Format("/?trang={0}", CurrentPage.ToString());
                }


                RaiseLog("Bắt đầu quét trang: " + baseUrl);

                var itemNodes = document.DocumentNode.SelectNodes("//div[@class='pca-pl-l']/ul[@class='pl-item-ul']/li");
                if (itemNodes == null)
                {
                    break;
                }
                foreach (var iNode in itemNodes)
                {
                    if (GlobalEnv.StopRuning)
                    {
                        break;
                    }

                    string ItemSiteName = iNode.SelectSingleNode("div/p[@class='pl-item-name']") != null?iNode.SelectSingleNode("div/p[@class='pl-item-name']").InnerText.Trim() : string.Empty;

                    string ItemBrand = iNode.SelectSingleNode("div/p[@class='pl-item-brand']").FirstChild != null?iNode.SelectSingleNode("div/p[@class='pl-item-brand']").FirstChild.InnerText.Trim() : string.Empty;

                    string ItemSiteCode = ItemSiteName.Substring(ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) + ItemBrand.Length, ItemSiteName.Length - ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) - ItemBrand.Length).Trim();

                    string iProductPricehtml = iNode.SelectSingleNode("div/div[@class='pl-item-price']/div[@class='pl-item-pbuy']/div[@class='draw-price']/div[@class='draw-price-content']") != null?iNode.SelectSingleNode("div/div[@class='pl-item-price']/div[@class='pl-item-pbuy']/div[@class='draw-price']/div[@class='draw-price-content']").InnerHtml : string.Empty;

                    string[] arrItemPrice = Regex.Split(iProductPricehtml, "</span>");
                    string[] realPrice    = arrItemPrice.Where(a => a.Contains("drw-pri-thumb-view")).ToArray <string>();
                    string   SitePrice    = "";
                    string   getPrice     = "drw-pri-thumb-view-";
                    foreach (string iPrice in realPrice)
                    {
                        if (iPrice.IndexOf("drw-pri-thumb-stt-0") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }

                        if (iPrice.IndexOf("drw-pri-thumb-stt-1") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }


                        if (iPrice.IndexOf("drw-pri-thumb-stt-2") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }


                        if (iPrice.IndexOf("drw-pri-thumb-stt-3") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }


                        if (iPrice.IndexOf("drw-pri-thumb-stt-4") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }


                        if (iPrice.IndexOf("drw-pri-thumb-stt-5") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }


                        if (iPrice.IndexOf("drw-pri-thumb-stt-6") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }


                        if (iPrice.IndexOf("drw-pri-thumb-stt-7") > 0)
                        {
                            SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty;
                        }
                    }

                    if (string.IsNullOrEmpty(SitePrice))
                    {
                        continue;
                    }

                    if (!string.IsNullOrEmpty(ItemSiteCode) && ItemSiteCode == fisrtItemCode)
                    {
                        breakLoop = true;
                        break;
                    }

                    CrawInfoCls CrawInfo = new CrawInfoCls();
                    CrawInfo.ItemSiteCode  = ItemSiteCode;
                    CrawInfo.SiteCode      = this.SiteCode;
                    CrawInfo.ItemBrand     = ItemBrand;
                    CrawInfo.SitePrice     = Convert.ToDouble(SitePrice) * 1000;
                    CrawInfo.ItemSiteName  = ItemSiteName;
                    CrawInfo.SiteItemGroup = this.SiteItemGroup;
                    CrawInfo.UrlCheck      = baseUrl;

                    //Gán mã số 1 để check
                    if (string.IsNullOrEmpty(fisrtItemCode))
                    {
                        fisrtItemCode = ItemSiteCode;
                    }

                    RaiseCrawInfo(CrawInfo);
                }


                //Xử lý chốt
                document  = LoadPage(baseUrl);
                listNodes = document.DocumentNode.SelectNodes("//div[@class='pca-pl-l']");
            }
            GlobalEnv.Waiting = false;
        }
Exemplo n.º 6
0
        public void LoadCrawSite(string url)
        {
            string baseUrl     = string.Empty;
            int    CurrentPage = 1;

            baseUrl = url + string.Format("?&pageIndex={0}", CurrentPage.ToString());

            HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl);
            var    listNodes     = document.DocumentNode.SelectNodes("//div[@class='row category-child']/div[@class='col-md-3 col-sm-4 col-xs-6 product']");
            string fisrtItemCode = string.Empty;
            bool   breakLoop     = false;

            while (listNodes != null && breakLoop == false)
            {
                RaiseLog("Bắt đầu quét trang: " + baseUrl);
                CurrentPage += 1;
                baseUrl      = url + string.Format("?&pageIndex={0}", CurrentPage.ToString());


                var itemNodes = document.DocumentNode.SelectNodes("//div[@class='col-md-3 col-sm-4 col-xs-6 product']/div[@class='product-info']");
                foreach (var iNode in itemNodes)
                {
                    if (GlobalEnv.StopRuning)
                    {
                        break;
                    }

                    string ItemSiteName = iNode.SelectSingleNode("h6/a") != null?iNode.SelectSingleNode("h6/a").InnerText.Trim() : string.Empty;

                    string ItemBrand = iNode.SelectSingleNode("div/img[@alt]").Attributes["alt"].Value != null?iNode.SelectSingleNode("div/img[@alt]").Attributes["alt"].Value.Trim() : string.Empty;

                    string ItemSiteCode = ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) > 0 ? ItemSiteName.Substring(ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()), ItemSiteName.Length - ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower())).Trim() : ItemSiteName.Substring(ItemSiteName.LastIndexOf(" "), ItemSiteName.Length - ItemSiteName.LastIndexOf(" "));
                    string SitePrice    = iNode.SelectSingleNode("div[@class='priceInfo']/span[@class='price']") != null?iNode.SelectSingleNode("div[@class='priceInfo']/span[@class='price']").InnerText.Trim() : string.Empty;

                    SitePrice = SitePrice.Replace("₫", string.Empty);
                    SitePrice = SitePrice.Replace(".", string.Empty);

                    if (string.IsNullOrEmpty(SitePrice))
                    {
                        continue;
                    }

                    if (!string.IsNullOrEmpty(ItemSiteCode) && ItemSiteCode == fisrtItemCode)
                    {
                        breakLoop = true;
                        break;
                    }

                    CrawInfoCls CrawInfo = new CrawInfoCls();
                    CrawInfo.SiteItemGroup = this.SiteItemGroup;
                    CrawInfo.ItemSiteCode  = ItemSiteCode;
                    CrawInfo.SiteCode      = this.SiteCode;
                    CrawInfo.ItemBrand     = ItemBrand;
                    CrawInfo.SitePrice     = Convert.ToDouble(SitePrice);
                    CrawInfo.ItemSiteName  = ItemSiteName;
                    CrawInfo.UrlCheck      = baseUrl;

                    //Gán mã số 1 để check
                    if (string.IsNullOrEmpty(fisrtItemCode))
                    {
                        fisrtItemCode = ItemSiteCode;
                    }

                    RaiseCrawInfo(CrawInfo);
                }

                //Xử lý chốt
                document  = LoadPage(baseUrl);
                listNodes = document.DocumentNode.SelectNodes("//div[@class='row category-child']/div[@class='col-md-3 col-sm-4 col-xs-6 product']");
            }
            GlobalEnv.Waiting = false;
        }
Exemplo n.º 7
0
        public void LoadCrawSite(string url)
        {
            string baseUrl = url;

            HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl);
            if (document.DocumentNode.SelectSingleNode("//html/body/div/div/script") == null)
            {
                return;
            }

            var    htmlScript = document.DocumentNode.SelectSingleNode("//html/body/div/div/script").InnerText.Trim();
            string jsScrip    = htmlScript.Substring(htmlScript.IndexOf("products"), htmlScript.LastIndexOf("]") + 1 - htmlScript.IndexOf("products"));

            string[] arrJson = jsScrip.Split('}');
            foreach (var iStr in arrJson)
            {
                if (GlobalEnv.StopRuning)
                {
                    break;
                }
                string  iCheck  = iStr.Substring(iStr.IndexOf('{') + 1, iStr.Length - iStr.IndexOf('{') - 1).Trim();
                JObject jObject = null;
                try
                {
                    jObject = JObject.Parse("{" + iCheck + "}");
                }
                catch
                {
                    continue;
                }

                string ItemSiteName = jObject["productname"].ToString();
                string SitePrice    = jObject["price"].ToString();
                if (SitePrice.IndexOf("class=price>") > 0)
                {
                    SitePrice = SitePrice.Substring(SitePrice.IndexOf("class=price>") + "class=price>".Length, SitePrice.Length - SitePrice.IndexOf("class=price>") - "class=price>".Length);
                    SitePrice = SitePrice.Substring(0, SitePrice.IndexOf("</span>"));
                    SitePrice = SitePrice.Replace(",", string.Empty);
                    SitePrice = SitePrice.Replace(".", string.Empty);
                    SitePrice = SitePrice.Trim();
                }
                else
                {
                    SitePrice = "0";
                }


                string ItemBrand    = jObject["brand"].ToString();
                string categorie    = jObject["categorie"].ToString();
                string link         = jObject["link"].ToString();
                string id           = jObject["id"].ToString();
                string ItemSiteCode = "";

                if (link.ToLower().IndexOf(ItemBrand.ToLower()) > 0 && link.ToLower().IndexOf(id.ToLower()) > 0)
                {
                    ItemSiteCode = link.Substring(link.ToLower().IndexOf(ItemBrand.ToLower()) + ItemBrand.Length, link.ToLower().IndexOf(id.ToLower()) - link.ToLower().IndexOf(ItemBrand.ToLower()) - ItemBrand.Length).Replace("-", string.Empty);
                }


                CrawInfoCls CrawInfo = new CrawInfoCls();
                CrawInfo.SiteItemGroup = this.SiteItemGroup;
                CrawInfo.ItemSiteCode  = ItemSiteCode;
                CrawInfo.SiteCode      = this.SiteCode;
                CrawInfo.ItemBrand     = ItemBrand;
                CrawInfo.SitePrice     = Convert.ToDouble(SitePrice);
                CrawInfo.ItemSiteName  = ItemSiteName;
                CrawInfo.UrlCheck      = baseUrl;

                RaiseCrawInfo(CrawInfo);
            }
            GlobalEnv.Waiting = false;
        }