void CrawSite_CrawInfoEvent(CrawInfoCls CrawInfo) { dbModelDataContext db = new dbModelDataContext(); var iCheck = (from i in db.CRItemPrices where i.SiteCode == CrawInfo.SiteCode && i.ItemSiteCode == CrawInfo.ItemSiteCode select i).ToList(); if (iCheck.Count > 0) { db.CRItemPrices.DeleteAllOnSubmit(iCheck); db.SubmitChanges(); } CRItemPrice iNew = new CRItemPrice(); iNew.SiteCode = CrawInfo.SiteCode; iNew.ItemSiteCode = CrawInfo.ItemSiteCode; iNew.ItemBrand = CrawInfo.ItemBrand; iNew.ItemSiteName = CrawInfo.ItemSiteName; iNew.SiteItemGroup = CrawInfo.SiteItemGroup; iNew.SitePrice = CrawInfo.SitePrice; iNew.UrlCheck = CrawInfo.UrlCheck; iNew.CreateDate = DateTime.Now; iNew.CreateBy = "SoftWare"; db.CRItemPrices.InsertOnSubmit(iNew); db.SubmitChanges(); cr_LogEvent(string.Format("Mã hàng: {0}, giá: {1}, Thương hiệu: {2}", CrawInfo.ItemSiteCode, CrawInfo.SitePrice, CrawInfo.ItemBrand)); }
public void RaiseCrawInfo(CrawInfoCls CrawInfo) { if (CrawInfoEvent != null) { CrawInfoEvent(CrawInfo); } }
public void LoadCrawSite(string url) { int CurrentItem = 0; string baseUrl = string.Empty; int CurrentPage = 1; baseUrl = url + string.Format("?page={0}", CurrentPage.ToString()); HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl); var listNodes = document.DocumentNode.SelectNodes("//ul[@class='cate ']/li"); if (listNodes == null) { return; } foreach (var ulItem in listNodes) { if (GlobalEnv.StopRuning) { break; } string ItemSiteName = ulItem.SelectSingleNode(ulItem.XPath + "//h3") != null?ulItem.SelectSingleNode(ulItem.XPath + "//h3").InnerText : string.Empty; string SitePrice = ulItem.SelectSingleNode(ulItem.XPath + "/strong") != null?ulItem.SelectSingleNode(ulItem.XPath + "/strong").InnerText : string.Empty; string linkItemCode = ulItem.SelectSingleNode(ulItem.XPath + "/div/a") != null?ulItem.SelectSingleNode(ulItem.XPath + "/div/a").Attributes["href"].Value : string.Empty; string ItemSiteCode = linkItemCode.Substring(linkItemCode.LastIndexOf('/') + 1, linkItemCode.Length - linkItemCode.LastIndexOf('/') - 1); string ItemBrand = ItemSiteCode.Substring(0, ItemSiteCode.IndexOf('-')); SitePrice = SitePrice.Replace("₫", string.Empty); SitePrice = SitePrice.Replace(".", string.Empty); CrawInfoCls CrawInfo = new CrawInfoCls(); CrawInfo.SiteItemGroup = this.SiteItemGroup; CrawInfo.ItemSiteCode = ItemSiteCode; CrawInfo.SiteCode = this.SiteCode; CrawInfo.ItemBrand = ItemBrand; CrawInfo.SitePrice = Convert.ToDouble(SitePrice); CrawInfo.ItemSiteName = ItemSiteName; CrawInfo.UrlCheck = baseUrl; RaiseCrawInfo(CrawInfo); } GlobalEnv.Waiting = false; }
public void LoadCrawSite(string url) { string baseUrl = string.Empty; int CurrentPage = 1; baseUrl = url + string.Format("?p={0}", CurrentPage.ToString()); HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl); var listNodes = document.DocumentNode.SelectNodes("//div[@class='category-products']/ol[@class='products-list']"); if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li") == null) { return; } var listPages = document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li").Count; int MaxPage = 0; for (int iIndex = 0; iIndex < listPages; iIndex++) { if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex] != null) { int checkPage = 0; int.TryParse(document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex].InnerText, out checkPage); if (checkPage > MaxPage) { MaxPage = checkPage; } } } string fisrtItemCode = string.Empty; bool breakLoop = false; while (CurrentPage < MaxPage + 2) { RaiseLog("Bắt đầu quét trang: " + baseUrl); CurrentPage += 1; baseUrl = url + string.Format("?p={0}", CurrentPage.ToString()); var itemNodes = document.DocumentNode.SelectNodes("//div[@class='category-products']/ol[@class='products-list']/li[@class='item']"); if (itemNodes != null) { foreach (var iNode in itemNodes) { if (GlobalEnv.StopRuning) { break; } string ItemSiteName = iNode.SelectSingleNode("div[@class='product-shop']/div/div/h2[@class='product-name']") != null?iNode.SelectSingleNode("div[@class='product-shop']/div/div/h2[@class='product-name']").InnerText.Trim() : string.Empty; string ItemBrand = ItemSiteName.Split(' ').Count <string>() > 1 ? ItemSiteName.Split(' ')[ItemSiteName.Split(' ').Count <string>() - 2] : string.Empty; string ItemSiteCode = ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) > 0 ? ItemSiteName.Substring(ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()), ItemSiteName.Length - ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower())).Trim() : ItemSiteName.Substring(ItemSiteName.LastIndexOf(" "), ItemSiteName.Length - ItemSiteName.LastIndexOf(" ")); string SitePrice = iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/p[@class='special-price']/span[@class='price']") != null?iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/p[@class='special-price']/span[@class='price']").InnerText.Trim() : string.Empty; if (string.IsNullOrEmpty(SitePrice)) { SitePrice = iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/span[@class='regular-price']/span[@class='price']") != null?iNode.SelectSingleNode("div[@class='product-shop']/div/div/div[@class='price-box']/span[@class='regular-price']/span[@class='price']").InnerText.Trim() : string.Empty; } SitePrice = SitePrice.Replace("₫", string.Empty); SitePrice = SitePrice.Replace(".", string.Empty); SitePrice = SitePrice.Trim(); if (string.IsNullOrEmpty(SitePrice)) { continue; } if (!string.IsNullOrEmpty(ItemSiteCode) && ItemSiteCode == fisrtItemCode) { breakLoop = true; break; } CrawInfoCls CrawInfo = new CrawInfoCls(); CrawInfo.SiteItemGroup = this.SiteItemGroup; CrawInfo.ItemSiteCode = ItemSiteCode; CrawInfo.SiteCode = this.SiteCode; CrawInfo.ItemBrand = ItemBrand; CrawInfo.SitePrice = Convert.ToDouble(SitePrice); CrawInfo.ItemSiteName = ItemSiteName; CrawInfo.UrlCheck = baseUrl; //Gán mã số 1 để check if (string.IsNullOrEmpty(fisrtItemCode)) { fisrtItemCode = ItemSiteCode; } RaiseCrawInfo(CrawInfo); } } //Xử lý chốt document = LoadPage(baseUrl); listNodes = document.DocumentNode.SelectNodes("//div[@class='category-products']/ol[@class='products-list']"); if (listNodes == null) { continue; } for (int iIndex = 0; iIndex < listPages; iIndex++) { if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li") == null) { break; } if (document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex] != null) { int checkPage = 0; int.TryParse(document.DocumentNode.SelectNodes("//div[@class='category-products']/div[@class='toolbar']/div[@class='pager']/div[@class='pages']/ol/li")[iIndex].InnerText, out checkPage); if (checkPage > MaxPage) { MaxPage = checkPage; } } } } GlobalEnv.Waiting = false; }
public void LoadCrawSite(string url) { string baseUrl = string.Empty; int CurrentPage = 1; if (url.LastIndexOf("/") < url.Length - 2) { baseUrl = url + string.Format("/?trang={0}", CurrentPage.ToString()); } else { baseUrl = url + string.Format("?trang={0}", CurrentPage.ToString()); } HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl); var listNodes = document.DocumentNode.SelectNodes("//div[@class='pca-pl-l']"); string fisrtItemCode = string.Empty; bool breakLoop = false; while (listNodes != null && breakLoop == false) { CurrentPage += 1; if (url.LastIndexOf('/') >= url.Length - 1) { baseUrl = url + string.Format("?trang={0}", CurrentPage.ToString()); } else { baseUrl = url + string.Format("/?trang={0}", CurrentPage.ToString()); } RaiseLog("Bắt đầu quét trang: " + baseUrl); var itemNodes = document.DocumentNode.SelectNodes("//div[@class='pca-pl-l']/ul[@class='pl-item-ul']/li"); if (itemNodes == null) { break; } foreach (var iNode in itemNodes) { if (GlobalEnv.StopRuning) { break; } string ItemSiteName = iNode.SelectSingleNode("div/p[@class='pl-item-name']") != null?iNode.SelectSingleNode("div/p[@class='pl-item-name']").InnerText.Trim() : string.Empty; string ItemBrand = iNode.SelectSingleNode("div/p[@class='pl-item-brand']").FirstChild != null?iNode.SelectSingleNode("div/p[@class='pl-item-brand']").FirstChild.InnerText.Trim() : string.Empty; string ItemSiteCode = ItemSiteName.Substring(ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) + ItemBrand.Length, ItemSiteName.Length - ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) - ItemBrand.Length).Trim(); string iProductPricehtml = iNode.SelectSingleNode("div/div[@class='pl-item-price']/div[@class='pl-item-pbuy']/div[@class='draw-price']/div[@class='draw-price-content']") != null?iNode.SelectSingleNode("div/div[@class='pl-item-price']/div[@class='pl-item-pbuy']/div[@class='draw-price']/div[@class='draw-price-content']").InnerHtml : string.Empty; string[] arrItemPrice = Regex.Split(iProductPricehtml, "</span>"); string[] realPrice = arrItemPrice.Where(a => a.Contains("drw-pri-thumb-view")).ToArray <string>(); string SitePrice = ""; string getPrice = "drw-pri-thumb-view-"; foreach (string iPrice in realPrice) { if (iPrice.IndexOf("drw-pri-thumb-stt-0") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-1") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-2") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-3") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-4") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-5") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-6") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } if (iPrice.IndexOf("drw-pri-thumb-stt-7") > 0) { SitePrice += !string.IsNullOrEmpty(iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1)) ? iPrice.Substring(iPrice.IndexOf(getPrice) + getPrice.Length, 1) : string.Empty; } } if (string.IsNullOrEmpty(SitePrice)) { continue; } if (!string.IsNullOrEmpty(ItemSiteCode) && ItemSiteCode == fisrtItemCode) { breakLoop = true; break; } CrawInfoCls CrawInfo = new CrawInfoCls(); CrawInfo.ItemSiteCode = ItemSiteCode; CrawInfo.SiteCode = this.SiteCode; CrawInfo.ItemBrand = ItemBrand; CrawInfo.SitePrice = Convert.ToDouble(SitePrice) * 1000; CrawInfo.ItemSiteName = ItemSiteName; CrawInfo.SiteItemGroup = this.SiteItemGroup; CrawInfo.UrlCheck = baseUrl; //Gán mã số 1 để check if (string.IsNullOrEmpty(fisrtItemCode)) { fisrtItemCode = ItemSiteCode; } RaiseCrawInfo(CrawInfo); } //Xử lý chốt document = LoadPage(baseUrl); listNodes = document.DocumentNode.SelectNodes("//div[@class='pca-pl-l']"); } GlobalEnv.Waiting = false; }
public void LoadCrawSite(string url) { string baseUrl = string.Empty; int CurrentPage = 1; baseUrl = url + string.Format("?&pageIndex={0}", CurrentPage.ToString()); HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl); var listNodes = document.DocumentNode.SelectNodes("//div[@class='row category-child']/div[@class='col-md-3 col-sm-4 col-xs-6 product']"); string fisrtItemCode = string.Empty; bool breakLoop = false; while (listNodes != null && breakLoop == false) { RaiseLog("Bắt đầu quét trang: " + baseUrl); CurrentPage += 1; baseUrl = url + string.Format("?&pageIndex={0}", CurrentPage.ToString()); var itemNodes = document.DocumentNode.SelectNodes("//div[@class='col-md-3 col-sm-4 col-xs-6 product']/div[@class='product-info']"); foreach (var iNode in itemNodes) { if (GlobalEnv.StopRuning) { break; } string ItemSiteName = iNode.SelectSingleNode("h6/a") != null?iNode.SelectSingleNode("h6/a").InnerText.Trim() : string.Empty; string ItemBrand = iNode.SelectSingleNode("div/img[@alt]").Attributes["alt"].Value != null?iNode.SelectSingleNode("div/img[@alt]").Attributes["alt"].Value.Trim() : string.Empty; string ItemSiteCode = ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()) > 0 ? ItemSiteName.Substring(ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower()), ItemSiteName.Length - ItemSiteName.ToLower().LastIndexOf(ItemBrand.ToLower())).Trim() : ItemSiteName.Substring(ItemSiteName.LastIndexOf(" "), ItemSiteName.Length - ItemSiteName.LastIndexOf(" ")); string SitePrice = iNode.SelectSingleNode("div[@class='priceInfo']/span[@class='price']") != null?iNode.SelectSingleNode("div[@class='priceInfo']/span[@class='price']").InnerText.Trim() : string.Empty; SitePrice = SitePrice.Replace("₫", string.Empty); SitePrice = SitePrice.Replace(".", string.Empty); if (string.IsNullOrEmpty(SitePrice)) { continue; } if (!string.IsNullOrEmpty(ItemSiteCode) && ItemSiteCode == fisrtItemCode) { breakLoop = true; break; } CrawInfoCls CrawInfo = new CrawInfoCls(); CrawInfo.SiteItemGroup = this.SiteItemGroup; CrawInfo.ItemSiteCode = ItemSiteCode; CrawInfo.SiteCode = this.SiteCode; CrawInfo.ItemBrand = ItemBrand; CrawInfo.SitePrice = Convert.ToDouble(SitePrice); CrawInfo.ItemSiteName = ItemSiteName; CrawInfo.UrlCheck = baseUrl; //Gán mã số 1 để check if (string.IsNullOrEmpty(fisrtItemCode)) { fisrtItemCode = ItemSiteCode; } RaiseCrawInfo(CrawInfo); } //Xử lý chốt document = LoadPage(baseUrl); listNodes = document.DocumentNode.SelectNodes("//div[@class='row category-child']/div[@class='col-md-3 col-sm-4 col-xs-6 product']"); } GlobalEnv.Waiting = false; }
public void LoadCrawSite(string url) { string baseUrl = url; HtmlAgilityPack.HtmlDocument document = LoadPage(baseUrl); if (document.DocumentNode.SelectSingleNode("//html/body/div/div/script") == null) { return; } var htmlScript = document.DocumentNode.SelectSingleNode("//html/body/div/div/script").InnerText.Trim(); string jsScrip = htmlScript.Substring(htmlScript.IndexOf("products"), htmlScript.LastIndexOf("]") + 1 - htmlScript.IndexOf("products")); string[] arrJson = jsScrip.Split('}'); foreach (var iStr in arrJson) { if (GlobalEnv.StopRuning) { break; } string iCheck = iStr.Substring(iStr.IndexOf('{') + 1, iStr.Length - iStr.IndexOf('{') - 1).Trim(); JObject jObject = null; try { jObject = JObject.Parse("{" + iCheck + "}"); } catch { continue; } string ItemSiteName = jObject["productname"].ToString(); string SitePrice = jObject["price"].ToString(); if (SitePrice.IndexOf("class=price>") > 0) { SitePrice = SitePrice.Substring(SitePrice.IndexOf("class=price>") + "class=price>".Length, SitePrice.Length - SitePrice.IndexOf("class=price>") - "class=price>".Length); SitePrice = SitePrice.Substring(0, SitePrice.IndexOf("</span>")); SitePrice = SitePrice.Replace(",", string.Empty); SitePrice = SitePrice.Replace(".", string.Empty); SitePrice = SitePrice.Trim(); } else { SitePrice = "0"; } string ItemBrand = jObject["brand"].ToString(); string categorie = jObject["categorie"].ToString(); string link = jObject["link"].ToString(); string id = jObject["id"].ToString(); string ItemSiteCode = ""; if (link.ToLower().IndexOf(ItemBrand.ToLower()) > 0 && link.ToLower().IndexOf(id.ToLower()) > 0) { ItemSiteCode = link.Substring(link.ToLower().IndexOf(ItemBrand.ToLower()) + ItemBrand.Length, link.ToLower().IndexOf(id.ToLower()) - link.ToLower().IndexOf(ItemBrand.ToLower()) - ItemBrand.Length).Replace("-", string.Empty); } CrawInfoCls CrawInfo = new CrawInfoCls(); CrawInfo.SiteItemGroup = this.SiteItemGroup; CrawInfo.ItemSiteCode = ItemSiteCode; CrawInfo.SiteCode = this.SiteCode; CrawInfo.ItemBrand = ItemBrand; CrawInfo.SitePrice = Convert.ToDouble(SitePrice); CrawInfo.ItemSiteName = ItemSiteName; CrawInfo.UrlCheck = baseUrl; RaiseCrawInfo(CrawInfo); } GlobalEnv.Waiting = false; }