private static async void GetHtmlAsync() { var url = "https://miami.craigslist.org/search/reb?min_price=2500&max_price=250000&availabilityMode=0&housing_type=6&sale_date=all+dates&lang=en&cc=gb"; var httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("class", "") .Equals("rows")).ToList(); int i = 0; var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("result-row")).ToList(); foreach (var ProductListItem in ProductListItems) { Console.WriteLine(ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Contains("result-title")).FirstOrDefault().InnerText); } Console.WriteLine(); }
public async void GetNews(string user_input, int ResultNum) { // Getting news from DailyMail user_input.Replace(@" ", "+"); var url = "https://www.dailymail.co.uk/home/search.html?sel=site&size=" + ResultNum + "&searchPhrase=" + user_input + "&sort=relevant"; var httpclient = new HttpClient(); var html = await httpclient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); string title, current_url; htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("sch-res-content")).ToList(); Article[] article = new Article[ResultNum]; for (int i = 0; i < ResultNum; i++) { var ProductListItems = ProductsHtml[i].Descendants("h3").Where(node => node.GetAttributeValue("class", "").Contains("sch-res-title")).ToList(); foreach (var ProductListItem in ProductListItems) { title = System.Net.WebUtility.HtmlDecode(ProductListItem.InnerText); current_url = "https://www.dailymail.co.uk" + ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""); article[i] = new Article(); article[i].Title = title; article[i].URL = current_url; //Console.WriteLine(i + 1 + ". " + title); //Console.WriteLine("Link: " + current_url); } } Articles = article; }
private static async void GetHtmlAsync() { var url = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2322090.m570.l1313&_nkw=xbox+one&_sacat=0"; var httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("class", "") .Equals("srp-results srp-list clearfix")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("data-view", "") .Contains("mi:1686|iid:")).ToList(); Console.WriteLine(ProductListItems.Count); foreach (var ProductListItem in ProductListItems) { //id Console.WriteLine(ProductListItem.GetAttributeValue("data-view", "")); //ProductName var productTitle = ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__title")).FirstOrDefault()?.InnerText.Trim('\r', '\n', '\t'); var productTitleHash = ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__title s-item__title--has-tags")).FirstOrDefault()?.InnerText.Trim('\r', '\n', '\t'); Console.WriteLine(productTitle ?? productTitleHash); //Price Console.WriteLine(ProductListItem.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__price")).FirstOrDefault()?.InnerText); //Price Using regex Console.WriteLine( Regex.Match( ProductListItem.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__price")).FirstOrDefault()?.InnerText , @"\d+.\d+") ); //url var productUrl = ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("s-item__link")).FirstOrDefault()?.GetAttributeValue("href", ""); Console.WriteLine(productUrl); Console.WriteLine(); } }
private static async void GetHTMLAsync(string link) { var url = link; var httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var Products = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = Products[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); foreach (var ProductListItem in ProductListItems) { Console.WriteLine(ProductListItem.GetAttributeValue("listingid", "")); Console.WriteLine(ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\t', '\n') ); Console.WriteLine(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\t', '\n') ); Console.WriteLine(); } Console.WriteLine("This is made by RTG, all rights reserved!"); }
private static async void GetHtmlAsync() { var url = "https://www.ebay.com/sch/i.html?_nkw=nintendo+switch&_in_kw=1&_ex_kw=&_sacat=0&_udlo=&_udhi=&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=15&_stpos=94040&_sargn=-1%26saslc%3D1&_salic=1&_sop=12&_dmd=1&_ipg=200"; var httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); Console.WriteLine(ProductListItems.Count() + " Listings"); Console.WriteLine(); foreach (var ProductListItem in ProductListItems) { //Listing ID Console.WriteLine("Listing ID: " + ProductListItem.GetAttributeValue("listingid", "")); //Product Name Console.WriteLine("Product Name: " + ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Replace("New listing", String.Empty).Replace("\n", String.Empty).Replace("\r", String.Empty).Replace("\t", String.Empty) ); //Product Price Console.WriteLine("Price: " + Regex.Match( ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText , @"\d+.\d+") ); //Listing Type Console.WriteLine("# of Bids or Buy it Now: " + ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Replace("\n", String.Empty).Replace("\r", String.Empty).Replace("\t", String.Empty) ); //Product URL Console.WriteLine("Product URL: " + ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("vip")).FirstOrDefault().GetAttributeValue("href", "") ); Console.WriteLine(); } }
private void ebayQuery(string product, string catagory, string minPrice, string maxPrice, string results, params string[] condition) { itemList = new List <Item>(); string url = "https://www.ebay.com/sch/i.html?_udlo=" + minPrice + "&_udhi=" + maxPrice + "&LH_ItemCondition=" + condition + "&_mPrRngCbx=1&_ipg=" + results + "&_from=R40&_trksid=p2045573.m570.l1313.TR0.TRC0.H0.TRS0&_nkw=" + product + "&_sacat=" + catagory + "&LH_Sold=1"; HtmlWeb page = new HtmlWeb(); var doc = page.Load(url); var ProductsHtml = doc.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); string itemResults = doc.DocumentNode.SelectSingleNode("//*[@id='cbelm']/div[3]/h1/span[1]").InnerText; foreach (var ProductListItem in ProductListItems) { //ID string id = ProductListItem.GetAttributeValue("listingid", ""); //Title string title = ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("New listing", "").Replace("\r", "").Replace("\t", ""); //Price string price = Regex.Match(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'), @"\d+.\d+").ToString(); //Bids string bids = ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("bids", "").Replace("Buy It Now", "").Replace("or Best Offer", "").Replace("bid", ""); //Links string link = ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""); //Image string imgLink = ProductListItem.Descendants("img").FirstOrDefault().GetAttributeValue("src", ""); //create a new Item and add to the item list itemList.Add(new Item(title, id, price, link, bids, imgLink)); ListViewItem lvi = new ListViewItem(id); lvi.SubItems.Add(title); lvi.SubItems.Add(price); lvi.SubItems.Add(bids); lvi.SubItems.Add(link); lvData.Items.Add(lvi); } label1.Text = lvData.Items.Count.ToString(); lblItemResults.Text = "Total Results : " + itemResults; }
private static async void GetHtmlAsync() { var url = "https://www.ebay.co.uk/sch/i.html?_nkw=xbox+one&_in_kw=1&_ex_kw=&_sacat=0&_udlo=&_udhi=&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=15&_stpos=CF434TP&_sargn=-1%26saslc%3D1&_salic=3&_sop=12&_dmd=1&_ipg=200"; var httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); //displays all nodes with id in the list var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList();//thins down the information and isolates each id by item foreach (var ProductListItem in ProductListItems) { //id Console.WriteLine(ProductListItem.GetAttributeValue("listingid", "")); //Product name Console.WriteLine(ProductListItem.Descendants("h3") .Where(HtmlNode => HtmlNode.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') //gets the inner text of the element ); //Price Console.WriteLine( Regex.Match( ProductListItem.Descendants("li") .Where(HtmlNode => HtmlNode.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') //trims return, newline and tab , @"\d+.\d+") //for some reason cant get this to work regex.match ); //Listing Type lvformat Console.WriteLine( ProductListItem.Descendants("li") .Where(HtmlNode => HtmlNode.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); //url Console.WriteLine( ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "").Trim('\r', '\n', '\t')); Console.WriteLine(); } }
private static async void GetHtmlAsync() { var url = "https://www.ebay.com/"; var httpClient = new HttpClient(); var html = await httpClient.GetAsync(url); string html2 = html.ToString(); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html2); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); foreach (var ProductListItem in ProductListItems) { //id Console.WriteLine(ProductListItem.GetAttributeValue("listingid", "")); //ProductName Console.WriteLine(ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("New listing", "").TrimStart() ); //ListingType Console.WriteLine( ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("or Best Offer", "0 bids").TrimEnd() ); //Url Console.WriteLine( ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "") ); Console.WriteLine(); } Console.WriteLine(); }
static async void GetHtmlAsync() { var url = "https://www.ebay.com/sch/i.html?_nkw=xbox+one&_in_kw=1&_ex_kw=&_sacat=0&LH_Complete=1&_udlo=&_udhi=&_samilow=&_samihi=&_sadis=15&_stpos=&_sargn=-1%26saslc%3D1&_salic=1&_sop=12&_dmd=1&_ipg=50"; var httpclient = new HttpClient(); var html = await httpclient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); Console.WriteLine(ProductListItems.Count()); Console.WriteLine(); foreach (var ProductListItem in ProductListItems) { //ID Console.WriteLine(ProductListItem.GetAttributeValue("listingid", "")); //ProductName Console.WriteLine(ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')); //Price Console.WriteLine(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); //ListingType Console.WriteLine(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); //URL Console.WriteLine( ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "") ); Console.WriteLine(); } }
static void Main(string[] args) { Console.OutputEncoding = Encoding.UTF8; Console.WriteLine("Введите название товара, который вас интересует: \n"); string input = Console.ReadLine(); Parser parser = new Parser($"https://www.olx.ua/list/q-{input}/"); HtmlAgilityPack.HtmlDocument htmlDoc = parser.LoadAndGetHtmlDocument(); parser.SetProductList("fixed offers breakword offers--top redesigned"); Console.WriteLine("У меня есть всё, если у тебя есть достаточно руппи\n"); try { foreach (var ProductListItem in parser.ProductListItems) { Console.WriteLine(ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lheight22 margintop5")).FirstOrDefault().InnerText); Console.WriteLine(ProductListItem.Descendants("p") .Where(node => node.GetAttributeValue("class", "") .Equals("price")).FirstOrDefault().InnerText); } } catch (Exception e) { Console.WriteLine("Цена не указана"); } parser.SetProductList("fixed offers breakword redesigned"); try { foreach (var ProductListItem in parser.ProductListItems) { Console.WriteLine(ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lheight22 margintop5")).FirstOrDefault().InnerText); Console.WriteLine(ProductListItem.Descendants("p") .Where(node => node.GetAttributeValue("class", "") .Equals("price")).FirstOrDefault().InnerText); } } catch (Exception e) { Console.WriteLine("Цена не указана"); } }
//scrapes individual product URLs from an ebay category URL public async Task <List <string> > GetProductLinksFromeBayAsync(string urlToCheck) { var html = await _httpClient.GetStringAsync(urlToCheck); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductListItems = htmlDocument.DocumentNode.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Contains("s-item")).ToList(); List <string> productUrls = new List <string>(); foreach (var ProductListItem in ProductListItems) { var urls = ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""); productUrls.Add(urls); } return(productUrls); }
async void GetHtmlAsync() { var url = TextBoxURL.Text; var httpclient = new HttpClient(); var html = await httpclient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); //Console.WriteLine(ProductListItems.Count()); //Console.WriteLine(); foreach (var ProductListItem in ProductListItems) { SqlConnection con = new SqlConnection(@"Data Source=(LocalDB)\MSSQLLocalDB;AttachDbFilename=|DataDirectory|Database2.mdf;Integrated Security=True"); SqlConnection conuser = new SqlConnection(@"Data Source=(LocalDB)\MSSQLLocalDB;AttachDbFilename=|DataDirectory|Database2.mdf;Integrated Security=True"); con.Open(); conuser.Open(); SqlCommand cmd = new SqlCommand("insert into User_Data values(@Id,@Product_Name,@Cost,@Info,@Link)", con); SqlCommand cmduser = new SqlCommand("insert into Refer values (@EMAIL_ID,@Id)", con); cmd.Parameters.AddWithValue("@Id", ProductListItem.GetAttributeValue("listingid", ""));//ID cmduser.Parameters.AddWithValue("@EMAIL_ID", Session["User"]); cmduser.Parameters.AddWithValue("@Id", ProductListItem.GetAttributeValue("listingid", ""));//ID cmd.Parameters.AddWithValue("@Product_Name", ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));//ProductName cmd.Parameters.AddWithValue("@Cost", ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));//Price cmd.Parameters.AddWithValue("@Info", ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));//ListingType cmd.Parameters.AddWithValue("@Link", ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""));//URL cmd.ExecuteNonQuery(); cmduser.ExecuteNonQuery(); con.Close(); conuser.Close(); TextBoxURL.Text = ""; } }
static async void GetHtmlAsync() { var url = "https://www.otodom.pl/sprzedaz/mieszkanie/warszawa/?search%5Bregion_id%5D=7&search%5Bsubregion_id%5D=197&search%5Bcity_id%5D=26&nrAdsPerPage=72"; var httpclient = new HttpClient(); var html = await httpclient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); var ProductsHtml = htmlDocument.DocumentNode.Descendants("div"). Where(node => node.GetAttributeValue("class", "").Equals("listing")).ToList(); var ProductListItems = new List <HtmlNode>(); try { ProductListItems = ProductsHtml[0].Descendants("article") .Where(node => node.GetAttributeValue("data-item-id", "") != null).ToList(); } catch (Exception e) { Console.WriteLine("No ni działa"); } Console.WriteLine(); Console.WriteLine("Liczba ofert: " + ProductListItems.Count()); Console.WriteLine("Wyniki:"); Console.WriteLine(); foreach (var ProductListItem in ProductListItems) { Console.WriteLine(); //ID Console.WriteLine("ID: " + ProductListItem.GetAttributeValue("data-item-id", "")); //Nazwa Console.WriteLine(ProductListItem.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("offer-item-title")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')); //Mieszkanie/Dom + Kupno/Sprzedaż + Dzielnica Console.WriteLine(ProductListItem.Descendants("p") .Where(node => node.GetAttributeValue("class", "") .Equals("text-nowrap")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); // Metraż Console.WriteLine(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("hidden-xs offer-item-area")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')); //Koszt Console.WriteLine(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("hidden-xs offer-item-price-per-m")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); Console.WriteLine(ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("offer-item-price")).FirstOrDefault().InnerText.Trim(' ', '\r', '\n', '\t') ); //URL Console.WriteLine("URL OFERTY: " + ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "") ); //URL zdjęcia Console.WriteLine("URL zdjęcia: " + ProductListItem.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Equals("img-cover lazy")).FirstOrDefault().GetAttributeValue("data-src", "") ); Console.WriteLine(); } }
static async void GetHtmlAsync() { var url = "https://www.ebay.ca/sch/i.html?_odkw=xbox+one&_osacat=0&_from=R40&_trksid=p2045573.m570.l1313.TR12.TRC2.A0.H0.Xlaptop.TRS0&_nkw=laptop&_sacat=200"; var httpclient = new HttpClient(); var html = await httpclient.GetStringAsync(url); //parse html doc var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); //load doc for parsing var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul") .Where(node => node.GetAttributeValue("id", "") .Equals("ListViewInner")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("li") .Where(node => node.GetAttributeValue("id", "") .Contains("item")).ToList(); Console.WriteLine(ProductListItems.Count()); Console.WriteLine(); foreach (var ProductListItem in ProductListItems) { //ID string pid = (ProductListItem.GetAttributeValue("listingid", "")); Console.WriteLine(pid); //ProductName string ProductName = ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'); Console.WriteLine(ProductName); //Price string price = ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'); Console.WriteLine(price); //ID Console.WriteLine(ProductListItem.GetAttributeValue("listingid", "")); //ProductName Console.WriteLine(ProductListItem.Descendants("h3") .Where(node => node.GetAttributeValue("class", "") .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')); //Price Console.WriteLine(ProductListItem.Descendants("li").Where(node => node.GetAttributeValue("class", "").Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')); //ListingType string ListingType = ProductListItem.Descendants("li") .Where(node => node.GetAttributeValue("class", "") .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'); // Console.WriteLine(ListingType); //URL string purl = ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""); Console.WriteLine(purl); string csvRow = string.Format("{0},{1},{2}", pid, ProductName, price); // string path = @"D:\Nital\Summer 2020\BDAT 1007 - Social Data Mining\S20Practical\WebScraping_Ebay\data.txt"; //File.AppendAllText(path, (csvRow)); Console.WriteLine(); } }
private static async void GetHtmlAsync() { var url = "https://www.amazon.com/s?k=doom+patrol+omnibus&crid=2HBTXIE5Z803G&sprefix=Doom+pat%2Caps%2C176&ref=nb_sb_ss_i_4_8"; var httpClient = new HttpClient(); var html = await httpClient.GetStringAsync(url); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html); // Gets List var ProductsHtml = htmlDocument.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "") .Equals("s-main-slot s-result-list s-search-results sg-row")).ToList(); var ProductListItems = ProductsHtml[0].Descendants("div") .Where(node => node.GetAttributeValue("data-asin", "") //data-asin .Contains("1")).ToList(); //1 //Console.WriteLine(ProductListItems.Count()); //Console.WriteLine(); foreach (var ProductListItem in ProductListItems) { // id Console.WriteLine(ProductListItem.GetAttributeValue("data-index", "")); // ProductName Console.WriteLine(ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("a-link-normal a-text-normal")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); //// Subtitle Console.WriteLine(ProductListItem.Descendants("span") .Where(node => node.GetAttributeValue("class", "") .Contains("a-")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); // Price Console.WriteLine( Regex.Match( ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("a-size-base a-link-normal a-text-normal")).FirstOrDefault().InnerText , @"\$\d+.\d+") ); // Book Type Console.WriteLine( ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("class", "") .Equals("a-size-base a-link-normal a-text-bold")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') ); //// Url //Console.WriteLine( // ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "").Trim('\r', '\n', '\t') // ); //Console.WriteLine(); } }
public static ProductsList GetProductListFromOlx(ProductsList productsList, string str) { string url = ($"https://www.olx.pl/elektronika/komputery/katowice/q- {str} /?search%5Border%5D=filter_float_price%3Aasc&search%5Bdist%5D=5").Replace(" ", ""); string urlPage2 = ($"https://www.olx.pl/elektronika/komputery/katowice/q- {str} /?search%5Border%5D=filter_float_price%3Aasc&search%5Bdist%5D=5").Replace(" ", ""); List <string> urlList = new List <string>(); for (int i = 1; i < 30; i++) { urlPage2 = ($"https://www.olx.pl/elektronika/komputery/katowice/q- {str} /?search%5Border%5D=filter_float_price%3Aasc&search%5Bdist%5D=5&page = {i}").Replace(" ", ""); urlList.Add(urlPage2); } productsList = new ProductsList(); productsList.MyList = new List <Product>(); Parallel.ForEach(urlList, (urlAdress, state) => { var httpClient = new HttpClient(); var html = httpClient.GetStringAsync(urlAdress); var htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(html.Result); var ProductList = htmlDocument.DocumentNode.Descendants(0) .Where(n => n.HasClass("offer")).ToList(); var sb = new StringBuilder(); var test = htmlDocument.DocumentNode.Descendants("link") .Select(node => node.GetAttributeValue("href", "")) .FirstOrDefault(); if (!test.Contains("search")) { state.Break(); } foreach (var ProductListItem in ProductList) { string title; string price; string link; string image; try { title = ProductListItem.Descendants("a") .Where(node => node.GetAttributeValue("data-cy", "") .Equals("listing-ad-title")).FirstOrDefault() .InnerHtml.Trim('\r', '\t', '\n'); price = ProductListItem.Descendants("p") .Where(node => node.GetAttributeValue("class", "") .Equals("price")).FirstOrDefault() .InnerHtml.Trim('\r', '\t', '\n'); link = ProductListItem.Descendants("a") .Select(node => node.GetAttributeValue("href", "")) .FirstOrDefault().Trim(); image = ProductListItem.Descendants("img") .Select(node => node.GetAttributeValue("src", "")) .FirstOrDefault().Trim(); } catch (Exception) { continue; } var newProduct = new Product() { Title = title.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", "").Replace(""", ""), Price = price.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", ""), Link = link.ToString().Trim('\r', '\t', '\n'), Image = image.ToString().Trim('\r', '\t', '\n') }; try { if (productsList.MyList.Any(x => x.Title == newProduct.Title)) { continue; } } catch (Exception) { continue; } productsList.MyList.Add(newProduct); } }); //NON ASYNC //foreach (var urlAdress in urlList) //{ // var httpClient = new HttpClient(); // var html = await httpClient.GetStringAsync(urlAdress); // var htmlDocument = new HtmlDocument(); // htmlDocument.LoadHtml(html); // var ProductList = htmlDocument.DocumentNode.Descendants(0) // .Where(n => n.HasClass("offer")).ToList(); // var sb = new StringBuilder(); // var test = htmlDocument.DocumentNode.Descendants("link") // .Select(node => node.GetAttributeValue("href", "")) // .FirstOrDefault(); // if (!test.Contains("search")) // { // break; // } // foreach (var ProductListItem in ProductList) // { // string title; // string price; // string link; // string image; // try // { // title = ProductListItem.Descendants("a") // .Where(node => node.GetAttributeValue("data-cy", "") // .Equals("listing-ad-title")).FirstOrDefault() // .InnerHtml.Trim('\r', '\t', '\n'); // price = ProductListItem.Descendants("p") // .Where(node => node.GetAttributeValue("class", "") // .Equals("price")).FirstOrDefault() // .InnerHtml.Trim('\r', '\t', '\n'); // link = ProductListItem.Descendants("a") // .Select(node => node.GetAttributeValue("href", "")) // .FirstOrDefault().Trim(); // image = ProductListItem.Descendants("img") // .Select(node => node.GetAttributeValue("src", "")) // .FirstOrDefault().Trim(); // } // catch (Exception) // { // continue; // } // var newProduct = new Product() // { // Title = title.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", "").Replace(""", ""), // Price = price.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", ""), // Link = link.ToString().Trim('\r', '\t', '\n'), // Image = image.ToString().Trim('\r', '\t', '\n') // }; // if (result.MyList.Any(x => x.Title == newProduct.Title)) // continue; // result.MyList.Add(newProduct); // } //} //TODO: Order by price or search engine productsList.MyList = productsList.MyList.OrderBy(p => p.Price).ToList(); return(productsList); }