public void ParseSneakersFromPage(Catalog catalog, string link) { Console.WriteLine("Парсим " + link); Uri uri = new Uri(link); string source = new WebClient().DownloadString(uri); var parser = new HtmlParser(); var document = parser.Parse(source); var items = document.QuerySelectorAll("div.item"); foreach (var item in items) { var sneaker = new Sneaker(); sneaker.sku = item.QuerySelector("div.art").InnerHtml; var priceDiv = item.QuerySelector("div.price"); string priceStr; if (priceDiv.QuerySelector("span.ssale") != null) { priceStr = priceDiv.QuerySelector("span.ssale").InnerHtml; sneaker.price = Int32.Parse(priceStr.Replace("р.", "")); sneaker.oldPrice = Int32.Parse(priceDiv.QuerySelector("s").InnerHtml); } else { priceStr = priceDiv.InnerHtml; sneaker.price = Int32.Parse(priceStr.Replace("р.", "")); } //из заголовка еще надо вытащить type и убрать nike sneaker.title = item.QuerySelector("span.name").InnerHtml; sneaker.ParseTitle(); sneaker.link = SITEURL + item.QuerySelector("a").GetAttribute("href"); sneaker.brand = this.brand; //sizes string[] sizesStrArr = item.QuerySelector("div.item_sizes").InnerHtml.Split(','); foreach (var size in sizesStrArr) { SneakerSize sneakerSize = new SneakerSize(sneaker, size.Trim()); sneaker.sizes.Add(sneakerSize); } //catalog.sneakers.Add(stockSneaker); catalog.AddUniqueSneaker(sneaker); } //next page var nextlinks = document.QuerySelector("div.pages").QuerySelectorAll("a.arrow"); foreach (var nextlink in nextlinks) { if (nextlink.ClassName == "arrow r") { string nextPageLink = nextlink.GetAttribute("href"); Thread.Sleep(100); ParseSneakersFromPage(catalog, SITEURL + nextPageLink); } } }
public void ParseSneakersFromPage(Catalog catalog, string link, string sex, string category) { Console.WriteLine("Парсим " + link); Uri uri = new Uri(link); WebClient webClient = new WebClient(); webClient.Encoding = Encoding.UTF8; string source = webClient.DownloadString(uri); webClient.Dispose(); var parser = new HtmlParser(); var document = parser.Parse(source); var items = document.QuerySelectorAll("div.ms2_product"); foreach (var item in items) { var sneaker = new Sneaker(); sneaker.sex = sex; sneaker.category = category; sneaker.link = SITEURL + "/" + item.QuerySelector("a").GetAttribute("href"); //title sneaker.title = item.QuerySelector("span.item-txt").InnerHtml; sneaker.ParseTitle(); //stockSneaker.title = stockSneaker.brand + " " + stockSneaker.title.Replace(stockSneaker.brand, "").Trim(); sneaker.title = sneaker.title.Replace("Jordan AIR JORDAN", "AIR JORDAN"); sneaker.title = sneaker.title.Replace("Jordan JORDAN", "AIR JORDAN"); //price string priceString = item.QuerySelector("div.item-price").InnerHtml.Replace("<!--h5>RUB</h5-->", "").Replace("RUB", "").Trim().Replace(" ", ""); sneaker.price = Double.Parse(priceString); //sizes string sizeString = item.QuerySelector("div.size-box").InnerHtml.Replace("US", ""); sizeString = sizeString.Replace("UK", ""); //есть один артикул у которого вместо US UK string[] stringSeparators = new string[] { "\n" }; string[] sizeArr = sizeString.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries); foreach (var size in sizeArr) { if (!String.IsNullOrWhiteSpace(size)) { SneakerSize snSize = new SneakerSize(size.Trim()); sneaker.sizes.Add(snSize); } } sneaker.DeleteDuplicateSizes(); catalog.AddUniqueSneaker(sneaker); } }
public void ParseSneakersFromPage(Catalog catalog, string brand, string link) { _logger.Info("Парсим " + link); var document = GetHtmlPageCrawlera5Try(link); if (document == null) { return; } //items var items = document.QuerySelector("ul.products-list").QuerySelectorAll("li"); foreach (var item in items) { var sneaker = new Sneaker(); //fullCatalogSneaker.sex = sex; //fullCatalogSneaker.categorySneakerFullCatalog = categorySneakerFullCatalog; //brand sneaker.brand = brand; //link var links = item.QuerySelectorAll("a"); sneaker.link = links[2].GetAttribute("href"); //prices var priceHTML = item.QuerySelector("div.price"); var salePriceHTML = item.QuerySelector("div.sale-price"); string priceString = String.Empty; //если товар идет по сейлу if (salePriceHTML != null) { //price priceString = salePriceHTML.QuerySelector("b").InnerHtml; priceString = priceString.Replace(".", ",").Trim(); sneaker.price = double.Parse(priceString); //old price priceString = priceHTML.QuerySelector("b").InnerHtml; priceString = priceString.Replace(".", ",").Trim(); sneaker.oldPrice = double.Parse(priceString); } else { priceString = priceHTML.QuerySelector("b").InnerHtml; priceString = priceString.Replace(".", ",").Trim(); sneaker.price = double.Parse(priceString); } //title sneaker.title = item.QuerySelector("span.model").InnerHtml; if (!sneaker.title.Contains(brand.ToUpper())) { sneaker.title = brand.ToUpper() + " " + sneaker.title.ToUpper(); } sneaker.title = WebUtility.HtmlDecode(sneaker.title); sneaker.ParseTitle(); if (!catalog.isExistSneakerInCatalog(sneaker)) { catalog.sneakers.Add(sneaker); } } //next page var nextPages = document.QuerySelector("ol.left").QuerySelectorAll("a"); foreach (var nextPage in nextPages) { if (nextPage.GetAttribute("title") == "Next page") { string nextPageLink = nextPage.GetAttribute("href"); ParseSneakersFromPage(catalog, brand, nextPageLink); } } }
public void ParseSneakersFromPage(Catalog catalog, string brand, string link) { //link = "https://en.titolo.ch/sneakers/nike?limit=36&p=3"; Console.WriteLine("Парсим " + link); Uri uri = new Uri(link); WebClient webClient = new WebClient(); webClient.Encoding = Encoding.UTF8; webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36"); string source = webClient.DownloadString(uri); webClient.Dispose(); var parser = new HtmlParser(); var document = parser.Parse(source); //items var items = document.QuerySelectorAll("li.item"); foreach (var item in items) { if (item.QuerySelector("p.out-of-stock") == null) { var sneaker = new Sneaker(); //fullCatalogSneaker.sex = sex; //fullCatalogSneaker.categorySneakerFullCatalog = categorySneakerFullCatalog; //brand sneaker.brand = brand; //link var linkHTML = item.QuerySelector("a.product-image"); sneaker.link = linkHTML.GetAttribute("href"); //prices var priceHTML = item.QuerySelector("div.price-box"); var salePriceHTML = item.QuerySelector("p.special-price"); string priceString = String.Empty; //если товар идет по сейлу if (salePriceHTML != null) { //price priceString = salePriceHTML.QuerySelector("span.price").InnerHtml; priceString = priceString.Replace("CHF", ""); priceString = priceString.Replace(".", ",").Trim(); sneaker.price = double.Parse(priceString); //old price priceString = priceHTML.QuerySelector("p.old-price").QuerySelector("span.price").InnerHtml; priceString = priceString.Replace("CHF", ""); priceString = priceString.Replace(".", ",").Trim(); sneaker.oldPrice = double.Parse(priceString); } else { priceString = priceHTML.QuerySelector("span.price").InnerHtml; priceString = priceString.Replace("CHF", ""); priceString = priceString.Replace(".", ",").Trim(); sneaker.price = double.Parse(priceString); } //title sneaker.title = String.Empty; if (sneaker.brand != "Jordan") { sneaker.title = sneaker.brand + " "; //у найк и найк сб нет бренда в тайтле, а у джордан уже есть } sneaker.title += item.QuerySelector("span.name").InnerHtml; sneaker.title = sneaker.title.ToUpper(); sneaker.ParseTitle(); //add to catalog if (!catalog.isExistSneakerInCatalog(sneaker)) { catalog.sneakers.Add(sneaker); } } } //next page var nextPage = document.QuerySelector("a.next"); if (nextPage != null) { string nextPageLink = nextPage.GetAttribute("href"); //Thread.Sleep(5000); ParseSneakersFromPage(catalog, brand, nextPageLink); } }
private Catalog ParseCatalogFromJson() { Catalog catalog = new Catalog(); var items = _json.listings; foreach (var item in items) { var sneaker = new Sneaker(); sneaker.brand = item.brand; sneaker.link = item.url; sneaker.price = item.price; sneaker.oldPrice = item.old_price; sneaker.sku = item.sku; sneaker.images = item.images; if (item.sizes != null) { if (item.sizes.Count > 0) { //sizes foreach (var sizeitem in item.sizes) { string sizeUS = sizeitem.us; SneakerSize size = new SneakerSize(sizeUS); sneaker.sizes.Add(size); } //sizes //title sneaker.title = item.title; if (!sneaker.title.ToUpper().Contains(item.brand.ToUpper())) { sneaker.title = item.brand.ToUpper() + " " + sneaker.title; } sneaker.ParseTitle(); if (sneaker.title.ToUpper().Contains("NIKE NIKE")) { bool test = true; } if (item.category == "men") { sneaker.category = Settings.CATEGORY_MEN; } else if (item.category == "women") { sneaker.category = Settings.CATEGORY_WOMEN; } else if (item.category == "kids") { sneaker.category = Settings.CATEGORY_KIDS; } else { throw new Exception("wrong category"); } if (item.sex == "men") { sneaker.sex = Settings.GENDER_MAN; } else if (item.sex == "women") { sneaker.sex = Settings.GENDER_WOMAN; } else if (item.sex == null) { sneaker.sex = null; } else { bool test = true; } sneaker.color = item.colorbrand; //add to catalog if (!catalog.isExistSneakerInCatalog(sneaker)) { catalog.sneakers.Add(sneaker); } } else { bool test = true; } } else { bool test = true; } } return(catalog); //throw new NotImplementedException(); }
public void ParseSneakersFromPage(Catalog catalog, string brand, string link) { Console.WriteLine("Парсим " + link); Uri uri = new Uri(link); WebClient webClient = new WebClient(); webClient.Encoding = Encoding.UTF8; string source = webClient.DownloadString(uri); webClient.Dispose(); var parser = new HtmlParser(); var document = parser.Parse(source); //items var items = document.QuerySelector("ul.products-list").QuerySelectorAll("li"); foreach (var item in items) { var sneaker = new Sneaker(); //fullCatalogSneaker.sex = sex; //fullCatalogSneaker.categorySneakerFullCatalog = categorySneakerFullCatalog; //brand sneaker.brand = brand; //link var links = item.QuerySelectorAll("a"); sneaker.link = links[2].GetAttribute("href"); //prices var priceHTML = item.QuerySelector("div.price"); var salePriceHTML = item.QuerySelector("div.sale-price"); string priceString = String.Empty; //если товар идет по сейлу if (salePriceHTML != null) { //price priceString = salePriceHTML.QuerySelector("b").InnerHtml; priceString = priceString.Replace(".", ",").Trim(); sneaker.price = double.Parse(priceString); //old price priceString = priceHTML.QuerySelector("b").InnerHtml; priceString = priceString.Replace(".", ",").Trim(); sneaker.oldPrice = double.Parse(priceString); } else { priceString = priceHTML.QuerySelector("b").InnerHtml; priceString = priceString.Replace(".", ",").Trim(); sneaker.price = double.Parse(priceString); } //title sneaker.title = item.QuerySelector("span.model").InnerHtml; if (!sneaker.title.Contains(brand.ToUpper())) { sneaker.title = brand.ToUpper() + " " + sneaker.title; } sneaker.ParseTitle(); if (!catalog.isExistSneakerInCatalog(sneaker)) { catalog.sneakers.Add(sneaker); } } //next page var nextPages = document.QuerySelector("ol.left").QuerySelectorAll("a"); foreach (var nextPage in nextPages) { if (nextPage.GetAttribute("title") == "Next page") { string nextPageLink = nextPage.GetAttribute("href"); ParseSneakersFromPage(catalog, brand, nextPageLink); } } }
public Catalog ParseCatalogFromJson(RootParsingObject json) { Catalog catalog = new Catalog(); var items = json.listings; foreach (var item in items) { var sneaker = new Sneaker(); sneaker.brand = item.brand; sneaker.link = item.url; sneaker.price = item.price; sneaker.oldPrice = item.old_price; sneaker.sku = item.sku; sneaker.images = item.images; sneaker.color = item.colorbrand; if (item.sizes != null) { if (item.sizes.Count > 0) { //title sneaker.title = item.title; if (!sneaker.title.ToUpper().Contains(item.brand.ToUpper())) { sneaker.title = item.brand.ToUpper() + " " + sneaker.title; } sneaker.ParseTitle(); //categorySneakerFullCatalog if (item.category == "men") { sneaker.category = Settings.CATEGORY_MEN; } else if (item.category == "women") { sneaker.category = Settings.CATEGORY_WOMEN; } else if (item.category == "kids") { sneaker.category = Settings.CATEGORY_KIDS; } //если категория нул, смотрим есть ли артикул в фулкаталоге и заполнена ли у него категория if (String.IsNullOrWhiteSpace(item.category)) { var fullCatalogSneaker = fullCatalog.GetSneakerFromSKU(item.sku); if (fullCatalogSneaker != null) { if (!String.IsNullOrWhiteSpace(fullCatalogSneaker.category)) { sneaker.category = fullCatalogSneaker.category; } } else { bool test = true; } } //если категория нул, то пробуем ее определить по другим размерам if (String.IsNullOrWhiteSpace(sneaker.category)) { if (item.sizes != null) { if (item.sizes.Count > 0) { var sizeitem = item.sizes[0]; sneaker.category = SizeConverters.SizeConverter.GetCategory(sizeitem.us, sizeitem.eu, sizeitem.uk, sizeitem.cm); } } } //Если категория пустая, то дальше нет смысла продолжать if (!String.IsNullOrWhiteSpace(sneaker.category)) { //sizes foreach (var sizeitem in item.sizes) { string sizeUS = String.Empty; if (!String.IsNullOrWhiteSpace(sizeitem.us)) { sizeUS = sizeitem.us; } else if (!String.IsNullOrWhiteSpace(sizeitem.eu)) { var sizes = sizeConverter.sizeChart.sizes.FindAll(x => x.eu == sizeitem.eu); var engCategory = Helper.ConvertCategoryRusToEng(sneaker.category); var size = sizes.Find(x => x.category == engCategory); if (size != null) { sizeUS = size.us; } } else if (!String.IsNullOrWhiteSpace(sizeitem.uk)) { var sizes = sizeConverter.sizeChart.sizes.FindAll(x => x.uk == sizeitem.uk); var engCategory = Helper.ConvertCategoryRusToEng(sneaker.category); var size = sizes.Find(x => x.category == engCategory); if (size != null) { sizeUS = size.us; } } else if (!String.IsNullOrWhiteSpace(sizeitem.cm)) { var sizes = sizeConverter.sizeChart.sizes.FindAll(x => x.cm == sizeitem.cm); var engCategory = Helper.ConvertCategoryRusToEng(sneaker.category); var size = sizes.Find(x => x.category == engCategory); if (size != null) { sizeUS = size.us; } } else { throw new Exception("wrong size"); } if (!String.IsNullOrWhiteSpace(sizeUS)) { SneakerSize size = new SneakerSize(sizeUS); sneaker.sizes.Add(size); } else { Program.Logger.Warn("Wrong size or category. SKU:" + item.sku + " category:" + sneaker.category + " Size: us:" + sizeitem.us + " eu:" + sizeitem.eu + " uk:" + sizeitem.uk + " cm:" + sizeitem.cm); //throw new Exception("Wrong sizeUS"); } } //sizes //sex if (item.sex == "men") { sneaker.sex = Settings.GENDER_MAN; } else if (item.sex == "women") { sneaker.sex = Settings.GENDER_WOMAN; } else if (item.sex == null) { sneaker.sex = null; } else { Program.Logger.Warn("wrong sex: " + item.sku); bool test = true; } //add to catalog if (!catalog.isExistSneakerInCatalog(sneaker)) { catalog.sneakers.Add(sneaker); } } else //если категория нулл { Program.Logger.Warn("wrong category: " + item.sku); } } else { bool test = true; } } else { bool test = true; } } return(catalog); //throw new NotImplementedException(); }
public void ParseSneakersFromPage(Catalog catalog, string link) { Console.WriteLine("Парсим " + link); Uri uri = new Uri(link); WebClient webClient = new WebClient(); webClient.Encoding = SITE_ENCODING; string source = webClient.DownloadString(uri); var parser = new HtmlParser(); var document = parser.Parse(source); //как определить кроссовок var items = document.QuerySelectorAll("div.product"); foreach (var item in items) { var sneaker = new Sneaker(); sneaker.link = item.QuerySelector("a").GetAttribute("href"); sneaker.brand = "Nike"; //fullCatalogSneaker.sku = item.QuerySelector("div.art").InnerHtml; //price var priceDiv = item.QuerySelector("div.price"); string priceStr; if (priceDiv.QuerySelector("del") != null) { priceStr = priceDiv.QuerySelector("del").InnerHtml; priceStr = priceStr.Substring(0, priceStr.IndexOf("<em")).Replace(" ", ""); sneaker.oldPrice = Int32.Parse(priceStr); //oldprice priceStr = priceDiv.InnerHtml; priceStr = priceStr.Substring(0, priceStr.IndexOf("<em")); priceStr = priceStr.Replace(" ", ""); sneaker.price = Int32.Parse(priceStr); } else { priceStr = priceDiv.InnerHtml; priceStr = priceStr.Substring(0, priceStr.IndexOf("<em")); priceStr = priceStr.Replace(" ", ""); sneaker.price = Int32.Parse(priceStr); } //title sneaker.title = item.QuerySelector("a").GetAttribute("title"); sneaker.title = sneaker.title.Replace("Мужские", "").Replace("Женские", "").Replace("Детские", "").Replace("Подростковые", "").Trim(); sneaker.title = sneaker.title.Replace("Кроссовки", "").Replace("кроссовки", "").Trim(); sneaker.ParseTitle(); //fullCatalogSneaker.brand = this.brand; //sizes //string[] sizesStrArr = item.QuerySelector("div.item_sizes").InnerHtml.Split(','); //foreach (var sizeUS in sizesStrArr) //{ // SneakerSize sneakerSize = new SneakerSize(fullCatalogSneaker, sizeUS.Trim()); // fullCatalogSneaker.sizes.Add(sneakerSize); //} //catalog.sneakers.Add(stockSneaker); catalog.AddUniqueSneaker(sneaker); } //next page var nextlinks = document.QuerySelector("div.pn").QuerySelectorAll("a.textlink"); foreach (var nextlink in nextlinks) { if (nextlink != null) { if (nextlink.InnerHtml == "далее") { string nextPageLink = nextlink.GetAttribute("href"); Thread.Sleep(100); ParseSneakersFromPage(catalog, nextPageLink); } } } }