public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 1, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url; if (minPrice != 0 || maxPrice != 0) { Url = SimpleUrl.Insert(SimpleUrl.IndexOf("&_sacat"), searchText.Replace(' ', '+')); Url += FilterChanger.Insert(FilterChanger.IndexOf("&_udhi"), minPrice.ToString()) + maxPrice.ToString() + PageChanger + StartPageNum; } else { Url = SimpleUrl.Insert(SimpleUrl.IndexOf("&_sacat"), searchText.Replace(' ', '+')) + PageChanger + StartPageNum; Console.WriteLine($"PageNumber: {StartPageNum}"); } HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("class", "") .Contains("srp-results")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("listing")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { Console.WriteLine($"Item: {ItemsCount}"); if (ItemsCount >= itemsCount) { return(SearchResults); } HtmlNode nameTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__info clearfix")).FirstOrDefault() .Descendants("a").FirstOrDefault().Descendants("h3") .FirstOrDefault(); string name = nameTag == null ? "\n" : nameTag.InnerText; HtmlNode priceTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__info clearfix")).FirstOrDefault() .Descendants("div").Where(node => node.GetAttributeValue("class", "") .Equals("s-item__details clearfix")).FirstOrDefault() .Descendants("span").Where(node => node.GetAttributeValue("class", "") .Equals("s-item__price")).FirstOrDefault(); string price = priceTag == null ? "\n" : priceTag.InnerText; HtmlNode hrefTag = item.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__link")).FirstOrDefault(); string realLink = hrefTag == null ? "\n" : hrefTag.GetAttributeValue("href", ""); HtmlNode imgTag = item.Descendants("img") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__image-img")).FirstOrDefault(); string imgLink = imgTag == null ? "\n" : imgTag.GetAttributeValue("src", ""); Item result = new Item(realLink, imgLink, name, price); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } var PageSpan = htmlDoc.DocumentNode.Descendants("ol") .Where(node => node.GetAttributeValue("class", "") .Equals("x-pagination__ol")).FirstOrDefault(); if (PageSpan != null) { HtmlNode PageHref; PageHref = PageSpan.Descendants("li") .Where(node => node.InnerText == (StartPageNum + 1).ToString()).FirstOrDefault(); if (PageHref != null) { StartPageNum++; } else { return(SearchResults); } } else { return(SearchResults); } } }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 1, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url; if (minPrice != 0 || maxPrice != 0) { Url = SimpleUrl.Insert(SimpleUrl.IndexOf('?') + 1, $"price1={minPrice}&price2={maxPrice}{FilterChanger}") + searchText.Replace(' ', '+') + PageChanger + StartPageNum; } else { Url = SimpleUrl + searchText.Replace(' ', '+') + PageChanger + StartPageNum; } HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("dl")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("a") .Where(node => node.GetAttributeValue("href", "") .Contains("item")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { if (ItemsCount >= itemsCount) { return(SearchResults); } HtmlNode nameDiv = item.Descendants("div") .FirstOrDefault().Descendants("div") .FirstOrDefault(); string name = nameDiv == null ? "\n" : nameDiv.InnerText; //Console.WriteLine($"Name: {name}"); string href = item.GetAttributeValue("href", ""); Console.WriteLine($"Href: {href}"); string realLink = DomainPart + href; HtmlNode imgNode = item.Descendants("img") .FirstOrDefault(); HtmlAttributeCollection attributes; string imgLink; if (imgNode != null) { attributes = imgNode.Attributes; if (attributes.Contains("data-original")) { imgLink = imgNode.GetAttributeValue("data-original", ""); } else if (attributes.Contains("src")) { imgLink = imgNode.GetAttributeValue("src", ""); } else { imgLink = "\n"; } } else { imgLink = "\n"; } Console.WriteLine($"ImgLink: {imgLink}"); HtmlNode priceNode = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("p")).FirstOrDefault(); string price = priceNode == null ? "\n" : priceNode.InnerText; Console.WriteLine($"Price: {price}"); HtmlNode categoryDiv = item?.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("c")).FirstOrDefault(); string category = categoryDiv == null ? "\n" : categoryDiv.InnerText; //Console.WriteLine($"Category: {category}"); HtmlNode dateDiv = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("d")).FirstOrDefault(); string date = dateDiv == null ? "\n" : dateDiv.InnerText; //Console.WriteLine($"Date: {date}"); Item result = new Item(realLink, imgLink, name, price); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } var PageSpan = htmlDoc.DocumentNode.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("pp")).FirstOrDefault(); if (PageSpan != null) { HtmlNode PageHref; PageHref = PageSpan.Descendants("a") .Where(node => node.InnerText == (StartPageNum + 1).ToString()).FirstOrDefault(); if (PageHref != null) { StartPageNum++; } else { return(SearchResults); } } else { return(SearchResults); } } }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url; if (minPrice != 0 || maxPrice != 0) { Url = SimpleUrl + searchText.Replace(" ", "%20"); Url = Url.Insert(Url.IndexOf("?") + 1, FilterChanger.Insert(FilterChanger.IndexOf("[from]=") + "[from]=".Length, minPrice.ToString())); Url = Url.Insert(Url.IndexOf("[to]=") + "[to]=".Length, maxPrice.ToString()) + PageChanger + StartPageNum; } else { Url = SimpleUrl + searchText.Replace(" ", "%20") + PageChanger + StartPageNum; } Console.WriteLine($"PageNumber: {StartPageNum}"); HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("class", "") .Contains("product_list")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Contains("product_item")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { Console.WriteLine($"Item: {ItemsCount}"); if (ItemsCount >= itemsCount) { return(SearchResults); } HtmlNode nameTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("product_item__title")).FirstOrDefault(); string name = nameTag == null ? "\n" : nameTag.InnerText; HtmlNode priceTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("product_item__description ")).FirstOrDefault()? .Descendants("div").FirstOrDefault(); string price = priceTag == null ? "\n" : priceTag.InnerText.Split('&')[0]; HtmlNode hrefTag = item.Descendants("a").FirstOrDefault(); string realLink = hrefTag == null ? "\n" : DomainPart + hrefTag.GetAttributeValue("href", ""); HtmlNode imgTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("product_item__image")).FirstOrDefault()? .Descendants("image").FirstOrDefault(); string imgLink = imgTag == null ? "\n" : imgTag.GetAttributeValue("xlink:href", ""); Item result = new Item(realLink, imgLink, name, price, SiteName); SearchResults.Add(result); ItemsCount++; } var PageSpan = htmlDoc.DocumentNode.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Contains("_paginator_next_button")).FirstOrDefault(); if (PageSpan != null) { StartPageNum++; } else { return(SearchResults); } } }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url = SimpleUrl + searchText.Replace(' ', '+'); HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("search-result")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("listitem")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { if (ItemsCount >= itemsCount) { return(SearchResults); } Console.WriteLine($"Item: {ItemsCount + 1}"); HtmlNode nameTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("item-body")).FirstOrDefault()? .Descendants("h3").FirstOrDefault(); string name = nameTag == null ? "\n" : nameTag.InnerText; HtmlNode hrefTag = item.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("prod-item-img")).FirstOrDefault(); string realLink = hrefTag == null ? "\n" : hrefTag.GetAttributeValue("href", ""); HtmlNode imgTag = hrefTag?.Descendants("img").FirstOrDefault(); HtmlAttributeCollection attributes; string imgLink; if (imgTag != null) { attributes = imgTag.Attributes; if (attributes.Contains("data-src")) { imgLink = imgTag.GetAttributeValue("data-src", ""); } else if (attributes.Contains("src")) { imgLink = imgTag.GetAttributeValue("src", ""); } else { imgLink = "\n"; } } else { imgLink = "\n"; } HtmlNode priceNode = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("item-body")).FirstOrDefault()? .Descendants("div").Where(node => node.GetAttributeValue("class", "") .Equals("price")).FirstOrDefault()?.Descendants("span").FirstOrDefault(); string price = priceNode == null ? "\n" : priceNode.InnerText; Item result = new Item(realLink, imgLink, name, price, SiteName); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } return(SearchResults); } }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url; if (minPrice != 0 || maxPrice != 0) { string pr1 = FilterChanger.Insert(FilterChanger.IndexOf(";"), minPrice.ToString()); string prices = pr1.Insert(pr1.IndexOf("&"), maxPrice.ToString()); Url = SimpleUrl + searchText.Replace(" ", "%20"); Url = Url.Insert(Url.IndexOf("search="), prices) + PageChanger + StartPageNum; } else { Url = SimpleUrl + searchText.Replace(" ", "%20") + PageChanger + StartPageNum; } HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("catalog_main_table j-products-container")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("j-card-item")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { Console.WriteLine($"Item: {ItemsCount}"); if (ItemsCount >= itemsCount) { return(SearchResults); } HtmlNode nameTag = item.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("goods-name")).FirstOrDefault(); string name = nameTag == null ? "\n" : nameTag.InnerText; HtmlNode priceTag; string price; priceTag = item.Descendants("ins") .Where(node => node.GetAttributeValue("class", "") .Equals("lower-price")).FirstOrDefault(); if (priceTag != null) { price = priceTag.InnerText; } else { priceTag = item.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("lower-price")).FirstOrDefault(); price = priceTag == null ? "\n" : priceTag.InnerText; } HtmlNode hrefTag = item.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Contains("ref_goods_n_p")).FirstOrDefault(); string realLink = hrefTag == null ? "\n" : hrefTag.GetAttributeValue("href", ""); HtmlNode imgTag = item.Descendants("img") .Where(node => node.GetAttributeValue("class", "") .Equals("thumbnail")).FirstOrDefault(); HtmlAttributeCollection attributes; string imgLink; if (imgTag != null) { attributes = imgTag.Attributes; if (attributes.Contains("data-original")) { imgLink = imgTag.GetAttributeValue("data-original", ""); } else if (attributes.Contains("src")) { imgLink = imgTag.GetAttributeValue("src", ""); } else { imgLink = "\n"; } } else { imgLink = "\n"; } Item result = new Item(realLink, imgLink, name, price, SiteName); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } var PageSpan = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("pageToInsert")).FirstOrDefault(); if (PageSpan != null) { HtmlNode PageHref; PageHref = PageSpan.Descendants("a") .Where(node => node.InnerText == (StartPageNum + 1).ToString()).FirstOrDefault(); if (PageHref != null) { StartPageNum++; } else { return(SearchResults); } } else { return(SearchResults); } } }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url = SimpleUrl + searchText.Replace(' ', '+'); HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("listview--bordered job-box")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("card hover-shadow")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { if (ItemsCount >= itemsCount) { return(SearchResults); } Console.WriteLine($"Item: {ItemsCount + 1}"); HtmlNode nameTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("listview__content")).FirstOrDefault()? .Descendants("div").Where(node => node.GetAttributeValue("class", "") .Equals("listview__heading")).FirstOrDefault()?.Descendants("a").FirstOrDefault(); string name = nameTag == null ? "\n" : nameTag.InnerText; string realLink = nameTag == null ? "\n" : DomainPart + nameTag.GetAttributeValue("href", ""); //HtmlNode imgTag = item.Descendants("div") // .Where(node => node.GetAttributeValue("class", "") // .Equals("listview__item")).FirstOrDefault()?.Descendants("img").FirstOrDefault(); //string imgLink = imgTag == null ? "\n" : DomainPart + imgTag.GetAttributeValue("src", ""); string imgLink = "\n"; HtmlNode priceNode = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("listview__attrs")).FirstOrDefault()? .Descendants("i") .Where(node => node.GetAttributeValue("class", "") .Equals("fa fa-money")).FirstOrDefault()?.ParentNode; HtmlNode descNode = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("listview__content")).FirstOrDefault()? .Descendants("p").FirstOrDefault(); string description = descNode == null ? "\n" : descNode.InnerText; string price = priceNode == null ? "\n" : priceNode.InnerText; string Description = description + ":" + (priceNode != null ? "Աշխատավարձ - " + price : " "); Item result = new Item(realLink, imgLink, name, Description, SiteName); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } return(SearchResults); } }
public override async Task <List <Item> > StartScraping(string searchText, int itemsCount = 0, int minPrice = 0, int maxPrice = 0) { int ItemsCount = 0; List <Item> SearchResults = new List <Item>(); while (true) { string Url = SimpleUrl.Insert(SimpleUrl.IndexOf("&limit"), searchText.Replace(' ', '+')) + ItemsPerPage.ToString() + PageChanger + ((StartPageNum - 1) * ItemsPerPage).ToString(); HtmlDocument htmlDoc = await GetHtmlDocument(Url); List <HtmlNode> ProdsHtml; try { ProdsHtml = htmlDoc.DocumentNode.Descendants("td") .Where(node => node.GetAttributeValue("id", "") .Equals("resultsCol")).ToList(); } catch (NullReferenceException) { Console.WriteLine("Your search did not match any postings."); return(null); } var ProductsList = new List <HtmlNode>(); foreach (HtmlNode Htmlnode in ProdsHtml) { ProductsList.AddRange(Htmlnode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Contains("row result")).ToList()); } Console.WriteLine($"LENGTH IS: {ProductsList.Count}"); foreach (var item in ProductsList) { if (ItemsCount >= (itemsCount == 2 * ItemsPerPage ? itemsCount - 1 : itemsCount)) { return(SearchResults); } Console.WriteLine($"Item: {ItemsCount}"); HtmlNode nameTag = item.Descendants("h2") .Where(node => node.GetAttributeValue("class", "") .Equals("title")).FirstOrDefault(); HtmlNode companyTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("sjcl")).FirstOrDefault(); string title = nameTag == null ? "\n" : nameTag.InnerText; string company = companyTag == null ? "\n" : companyTag?.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("company")).FirstOrDefault()?.InnerText; string location = companyTag == null ? "\n" : companyTag?.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Contains("location")).FirstOrDefault()?.InnerText; string Name = title + ", " + company + ", " + location; HtmlNode hrefTag = item.Descendants("h2") .Where(node => node.GetAttributeValue("class", "") .Equals("title")).FirstOrDefault()?.Descendants("a") .FirstOrDefault(); string realLink = hrefTag == null ? "\n" : DomainPart + hrefTag.GetAttributeValue("href", ""); string imgLink = "\n"; HtmlNode descTag = item.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("summary")).FirstOrDefault(); HtmlNode priceTag = item.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("salaryText")).FirstOrDefault(); HtmlNode dateTag = item.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Contains("date")).FirstOrDefault(); string date = dateTag == null ? "\n" : dateTag.InnerText; string description = descTag == null ? "\n" : descTag.InnerText; string price = priceTag == null ? "\n" : priceTag.InnerText; string Description = description + "\n" + (priceTag != null ? "Salary - " + $"{price}." : " ") + "\n" + (dateTag != null ? "Posted - " + date : " "); Item result = new Item(realLink, imgLink, Name, Description, SiteName); SearchResults.Add(result); Console.WriteLine(); ItemsCount++; } var PageSpan = htmlDoc.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("class", "") .Equals("pagination-list")).FirstOrDefault(); if (PageSpan != null) { HtmlNode PageHref; PageHref = PageSpan.Descendants("a") .Where(node => node.GetAttributeValue("aria-label", "") == (StartPageNum + 1).ToString()).FirstOrDefault(); if (PageHref != null) { StartPageNum++; } else { PageHref = PageSpan.Descendants("a") .Where(node => node.GetAttributeValue("aria-label", "") == "Next").FirstOrDefault(); if (PageHref != null) { StartPageNum++; } else { return(SearchResults); } } } else { return(SearchResults); } } }