Example #1
0
        private static async void GetHtmlAsync()
        {
            var url        = "https://miami.craigslist.org/search/reb?min_price=2500&max_price=250000&availabilityMode=0&housing_type=6&sale_date=all+dates&lang=en&cc=gb";
            var httpClient = new HttpClient();
            var html       = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("rows")).ToList();

            int i = 0;
            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("class", "")
                                          .Equals("result-row")).ToList();

            foreach (var ProductListItem in ProductListItems)
            {
                Console.WriteLine(ProductListItem.Descendants("a")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Contains("result-title")).FirstOrDefault().InnerText);
            }



            Console.WriteLine();
        }
Example #2
0
        public async void GetNews(string user_input, int ResultNum)
        {
            // Getting news from DailyMail
            user_input.Replace(@" ", "+");
            var url        = "https://www.dailymail.co.uk/home/search.html?sel=site&size=" + ResultNum + "&searchPhrase=" + user_input + "&sort=relevant";
            var httpclient = new HttpClient();
            var html       = await httpclient.GetStringAsync(url);

            var    htmlDocument = new HtmlDocument();
            string title, current_url;

            htmlDocument.LoadHtml(html);
            var ProductsHtml = htmlDocument.DocumentNode.Descendants("div").Where(node => node.GetAttributeValue("class", "").Equals("sch-res-content")).ToList();

            Article[] article = new Article[ResultNum];
            for (int i = 0; i < ResultNum; i++)
            {
                var ProductListItems = ProductsHtml[i].Descendants("h3").Where(node => node.GetAttributeValue("class", "").Contains("sch-res-title")).ToList();
                foreach (var ProductListItem in ProductListItems)
                {
                    title            = System.Net.WebUtility.HtmlDecode(ProductListItem.InnerText);
                    current_url      = "https://www.dailymail.co.uk" + ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "");
                    article[i]       = new Article();
                    article[i].Title = title;
                    article[i].URL   = current_url;
                    //Console.WriteLine(i + 1 + ". " + title);
                    //Console.WriteLine("Link: " + current_url);
                }
            }

            Articles = article;
        }
Example #3
0
        private static async void GetHtmlAsync()
        {
            var url = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2322090.m570.l1313&_nkw=xbox+one&_sacat=0";

            var httpClient = new HttpClient();
            var html       = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("srp-results srp-list clearfix")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("data-view", "")
                                          .Contains("mi:1686|iid:")).ToList();

            Console.WriteLine(ProductListItems.Count);

            foreach (var ProductListItem in ProductListItems)
            {
                //id
                Console.WriteLine(ProductListItem.GetAttributeValue("data-view", ""));

                //ProductName
                var productTitle = ProductListItem.Descendants("h3")
                                   .Where(node => node.GetAttributeValue("class", "")
                                          .Equals("s-item__title")).FirstOrDefault()?.InnerText.Trim('\r', '\n', '\t');
                var productTitleHash = ProductListItem.Descendants("h3")
                                       .Where(node => node.GetAttributeValue("class", "")
                                              .Equals("s-item__title s-item__title--has-tags")).FirstOrDefault()?.InnerText.Trim('\r', '\n', '\t');
                Console.WriteLine(productTitle ?? productTitleHash);

                //Price
                Console.WriteLine(ProductListItem.Descendants("span")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("s-item__price")).FirstOrDefault()?.InnerText);

                //Price Using regex
                Console.WriteLine(
                    Regex.Match(
                        ProductListItem.Descendants("span")
                        .Where(node => node.GetAttributeValue("class", "")
                               .Equals("s-item__price")).FirstOrDefault()?.InnerText
                        , @"\d+.\d+")
                    );


                //url
                var productUrl = ProductListItem.Descendants("a")
                                 .Where(node => node.GetAttributeValue("class", "")
                                        .Equals("s-item__link")).FirstOrDefault()?.GetAttributeValue("href", "");
                Console.WriteLine(productUrl);


                Console.WriteLine();
            }
        }
Example #4
0
        private static async void GetHTMLAsync(string link)
        {
            var url        = link;
            var httpClient = new HttpClient();
            var html       = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var Products = htmlDocument.DocumentNode.Descendants("ul")
                           .Where(node => node.GetAttributeValue("id", "")
                                  .Equals("ListViewInner")).ToList();

            var ProductListItems = Products[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            foreach (var ProductListItem in ProductListItems)
            {
                Console.WriteLine(ProductListItem.GetAttributeValue("listingid", ""));
                Console.WriteLine(ProductListItem.Descendants("h3")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\t', '\n')
                                  );
                Console.WriteLine(ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\t', '\n')
                                  );

                Console.WriteLine();
            }

            Console.WriteLine("This is made by RTG, all rights reserved!");
        }
Example #5
0
        private static async void GetHtmlAsync()
        {
            var url = "https://www.ebay.com/sch/i.html?_nkw=nintendo+switch&_in_kw=1&_ex_kw=&_sacat=0&_udlo=&_udhi=&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=15&_stpos=94040&_sargn=-1%26saslc%3D1&_salic=1&_sop=12&_dmd=1&_ipg=200";

            var httpClient = new HttpClient();
            var html       = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            Console.WriteLine(ProductListItems.Count() + " Listings");
            Console.WriteLine();

            foreach (var ProductListItem in ProductListItems)
            {
                //Listing ID
                Console.WriteLine("Listing ID: " + ProductListItem.GetAttributeValue("listingid", ""));

                //Product Name
                Console.WriteLine("Product Name: " + ProductListItem.Descendants("h3")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvtitle")).FirstOrDefault().InnerText.Replace("New listing", String.Empty).Replace("\n", String.Empty).Replace("\r", String.Empty).Replace("\t", String.Empty)
                                  );

                //Product Price
                Console.WriteLine("Price: " +
                                  Regex.Match(
                                      ProductListItem.Descendants("li")
                                      .Where(node => node.GetAttributeValue("class", "")
                                             .Equals("lvprice prc")).FirstOrDefault().InnerText
                                      , @"\d+.\d+")
                                  );


                //Listing Type
                Console.WriteLine("# of Bids or Buy it Now: " + ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvformat")).FirstOrDefault().InnerText.Replace("\n", String.Empty).Replace("\r", String.Empty).Replace("\t", String.Empty)
                                  );


                //Product URL
                Console.WriteLine("Product URL: " +
                                  ProductListItem.Descendants("a")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("vip")).FirstOrDefault().GetAttributeValue("href", "")
                                  );

                Console.WriteLine();
            }
        }
Example #6
0
        private void ebayQuery(string product, string catagory, string minPrice, string maxPrice, string results, params string[] condition)
        {
            itemList = new List <Item>();
            string url = "https://www.ebay.com/sch/i.html?_udlo=" + minPrice + "&_udhi=" + maxPrice + "&LH_ItemCondition=" +
                         condition + "&_mPrRngCbx=1&_ipg=" + results + "&_from=R40&_trksid=p2045573.m570.l1313.TR0.TRC0.H0.TRS0&_nkw=" +
                         product + "&_sacat=" + catagory + "&LH_Sold=1";

            HtmlWeb page = new HtmlWeb();
            var     doc  = page.Load(url);

            var ProductsHtml = doc.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            string itemResults = doc.DocumentNode.SelectSingleNode("//*[@id='cbelm']/div[3]/h1/span[1]").InnerText;

            foreach (var ProductListItem in ProductListItems)
            {
                //ID
                string id = ProductListItem.GetAttributeValue("listingid", "");
                //Title
                string title = ProductListItem.Descendants("h3")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("New listing", "").Replace("\r", "").Replace("\t", "");
                //Price
                string price = Regex.Match(ProductListItem.Descendants("li")
                                           .Where(node => node.GetAttributeValue("class", "")
                                                  .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'),
                                           @"\d+.\d+").ToString();
                //Bids
                string bids = ProductListItem.Descendants("li")
                              .Where(node => node.GetAttributeValue("class", "")
                                     .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("bids", "").Replace("Buy It Now", "").Replace("or Best Offer", "").Replace("bid", "");
                //Links
                string link = ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "");
                //Image
                string imgLink = ProductListItem.Descendants("img").FirstOrDefault().GetAttributeValue("src", "");

                //create a new Item and add to the item list
                itemList.Add(new Item(title, id, price, link, bids, imgLink));

                ListViewItem lvi = new ListViewItem(id);
                lvi.SubItems.Add(title);
                lvi.SubItems.Add(price);
                lvi.SubItems.Add(bids);
                lvi.SubItems.Add(link);
                lvData.Items.Add(lvi);
            }
            label1.Text         = lvData.Items.Count.ToString();
            lblItemResults.Text = "Total Results : " + itemResults;
        }
Example #7
0
        private static async void GetHtmlAsync()
        {
            var url = "https://www.ebay.co.uk/sch/i.html?_nkw=xbox+one&_in_kw=1&_ex_kw=&_sacat=0&_udlo=&_udhi=&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=15&_stpos=CF434TP&_sargn=-1%26saslc%3D1&_salic=3&_sop=12&_dmd=1&_ipg=200";

            var httpClient = new HttpClient();
            var html       = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList(); //displays all nodes with id in the list

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();//thins down the information and isolates each id by item

            foreach (var ProductListItem in ProductListItems)
            {
                //id
                Console.WriteLine(ProductListItem.GetAttributeValue("listingid", ""));


                //Product name
                Console.WriteLine(ProductListItem.Descendants("h3")
                                  .Where(HtmlNode => HtmlNode.GetAttributeValue("class", "")
                                         .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') //gets the inner text of the element
                                  );


                //Price
                Console.WriteLine(
                    Regex.Match(
                        ProductListItem.Descendants("li")
                        .Where(HtmlNode => HtmlNode.GetAttributeValue("class", "")
                               .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t') //trims return, newline and tab
                        , @"\d+.\d+")                                                                    //for some reason cant get this to work regex.match
                    );

                //Listing Type lvformat
                Console.WriteLine(
                    ProductListItem.Descendants("li")
                    .Where(HtmlNode => HtmlNode.GetAttributeValue("class", "")
                           .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                    );


                //url
                Console.WriteLine(
                    ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "").Trim('\r', '\n', '\t'));
                Console.WriteLine();
            }
        }
Example #8
0
        private static async void GetHtmlAsync()
        {
            var url = "https://www.ebay.com/";

            var httpClient = new HttpClient();
            var html       = await httpClient.GetAsync(url);

            string html2 = html.ToString();

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html2);

            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            foreach (var ProductListItem in ProductListItems)
            {
                //id
                Console.WriteLine(ProductListItem.GetAttributeValue("listingid", ""));

                //ProductName
                Console.WriteLine(ProductListItem.Descendants("h3")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("New listing", "").TrimStart()
                                  );

                //ListingType
                Console.WriteLine(
                    ProductListItem.Descendants("li")
                    .Where(node => node.GetAttributeValue("class", "")
                           .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t').Replace("or Best Offer", "0 bids").TrimEnd()
                    );

                //Url
                Console.WriteLine(
                    ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "")
                    );

                Console.WriteLine();
            }

            Console.WriteLine();
        }
Example #9
0
        static async void GetHtmlAsync()
        {
            var url        = "https://www.ebay.com/sch/i.html?_nkw=xbox+one&_in_kw=1&_ex_kw=&_sacat=0&LH_Complete=1&_udlo=&_udhi=&_samilow=&_samihi=&_sadis=15&_stpos=&_sargn=-1%26saslc%3D1&_salic=1&_sop=12&_dmd=1&_ipg=50";
            var httpclient = new HttpClient();
            var html       = await httpclient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);
            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            Console.WriteLine(ProductListItems.Count());
            Console.WriteLine();
            foreach (var ProductListItem in ProductListItems)
            {
                //ID
                Console.WriteLine(ProductListItem.GetAttributeValue("listingid", ""));

                //ProductName
                Console.WriteLine(ProductListItem.Descendants("h3")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));

                //Price
                Console.WriteLine(ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                                  );

                //ListingType
                Console.WriteLine(ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                                  );

                //URL
                Console.WriteLine(
                    ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "")
                    );
                Console.WriteLine();
            }
        }
Example #10
0
        static void Main(string[] args)
        {
            Console.OutputEncoding = Encoding.UTF8;

            Console.WriteLine("Введите название товара, который вас интересует: \n");
            string input = Console.ReadLine();

            Parser parser = new Parser($"https://www.olx.ua/list/q-{input}/");

            HtmlAgilityPack.HtmlDocument htmlDoc = parser.LoadAndGetHtmlDocument();

            parser.SetProductList("fixed offers breakword offers--top redesigned");

            Console.WriteLine("У меня есть всё, если у тебя есть достаточно руппи\n");

            try
            {
                foreach (var ProductListItem in parser.ProductListItems)
                {
                    Console.WriteLine(ProductListItem.Descendants("h3")
                                      .Where(node => node.GetAttributeValue("class", "")
                                             .Equals("lheight22 margintop5")).FirstOrDefault().InnerText);


                    Console.WriteLine(ProductListItem.Descendants("p")
                                      .Where(node => node.GetAttributeValue("class", "")
                                             .Equals("price")).FirstOrDefault().InnerText);
                }
            }
            catch (Exception e) { Console.WriteLine("Цена не указана"); }
            parser.SetProductList("fixed offers breakword redesigned");
            try
            {
                foreach (var ProductListItem in parser.ProductListItems)
                {
                    Console.WriteLine(ProductListItem.Descendants("h3")
                                      .Where(node => node.GetAttributeValue("class", "")
                                             .Equals("lheight22 margintop5")).FirstOrDefault().InnerText);

                    Console.WriteLine(ProductListItem.Descendants("p")
                                      .Where(node => node.GetAttributeValue("class", "")
                                             .Equals("price")).FirstOrDefault().InnerText);
                }
            }
            catch (Exception e) { Console.WriteLine("Цена не указана"); }
        }
        //scrapes individual product URLs from an ebay category URL
        public async Task <List <string> > GetProductLinksFromeBayAsync(string urlToCheck)
        {
            var html = await _httpClient.GetStringAsync(urlToCheck);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var ProductListItems = htmlDocument.DocumentNode.Descendants("li")
                                   .Where(node => node.GetAttributeValue("class", "")
                                          .Contains("s-item")).ToList();

            List <string> productUrls = new List <string>();

            foreach (var ProductListItem in ProductListItems)
            {
                var urls = ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "");
                productUrls.Add(urls);
            }
            return(productUrls);
        }
Example #12
0
        async void GetHtmlAsync()
        {
            var url        = TextBoxURL.Text;
            var httpclient = new HttpClient();
            var html       = await httpclient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);
            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            //Console.WriteLine(ProductListItems.Count());
            //Console.WriteLine();
            foreach (var ProductListItem in ProductListItems)
            {
                SqlConnection con     = new SqlConnection(@"Data Source=(LocalDB)\MSSQLLocalDB;AttachDbFilename=|DataDirectory|Database2.mdf;Integrated Security=True");
                SqlConnection conuser = new SqlConnection(@"Data Source=(LocalDB)\MSSQLLocalDB;AttachDbFilename=|DataDirectory|Database2.mdf;Integrated Security=True");


                con.Open();
                conuser.Open();



                SqlCommand cmd     = new SqlCommand("insert into User_Data values(@Id,@Product_Name,@Cost,@Info,@Link)", con);
                SqlCommand cmduser = new SqlCommand("insert into Refer values (@EMAIL_ID,@Id)", con);



                cmd.Parameters.AddWithValue("@Id", ProductListItem.GetAttributeValue("listingid", ""));//ID

                cmduser.Parameters.AddWithValue("@EMAIL_ID", Session["User"]);
                cmduser.Parameters.AddWithValue("@Id", ProductListItem.GetAttributeValue("listingid", ""));//ID



                cmd.Parameters.AddWithValue("@Product_Name", ProductListItem.Descendants("h3")
                                            .Where(node => node.GetAttributeValue("class", "")
                                                   .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));//ProductName


                cmd.Parameters.AddWithValue("@Cost", ProductListItem.Descendants("li")
                                            .Where(node => node.GetAttributeValue("class", "")
                                                   .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));//Price


                cmd.Parameters.AddWithValue("@Info", ProductListItem.Descendants("li")
                                            .Where(node => node.GetAttributeValue("class", "")
                                                   .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));//ListingType


                cmd.Parameters.AddWithValue("@Link", ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", ""));//URL


                cmd.ExecuteNonQuery();
                cmduser.ExecuteNonQuery();

                con.Close();
                conuser.Close();

                TextBoxURL.Text = "";
            }
        }
Example #13
0
        static async void GetHtmlAsync()
        {
            var url        = "https://www.otodom.pl/sprzedaz/mieszkanie/warszawa/?search%5Bregion_id%5D=7&search%5Bsubregion_id%5D=197&search%5Bcity_id%5D=26&nrAdsPerPage=72";
            var httpclient = new HttpClient();
            var html       = await httpclient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);

            var ProductsHtml = htmlDocument.DocumentNode.Descendants("div").
                               Where(node => node.GetAttributeValue("class", "").Equals("listing")).ToList();

            var ProductListItems = new List <HtmlNode>();

            try
            {
                ProductListItems = ProductsHtml[0].Descendants("article")
                                   .Where(node => node.GetAttributeValue("data-item-id", "") != null).ToList();
            }

            catch (Exception e)
            {
                Console.WriteLine("No ni działa");
            }
            Console.WriteLine();
            Console.WriteLine("Liczba ofert: " + ProductListItems.Count());
            Console.WriteLine("Wyniki:");
            Console.WriteLine();

            foreach (var ProductListItem in ProductListItems)
            {
                Console.WriteLine();
                //ID
                Console.WriteLine("ID: " + ProductListItem.GetAttributeValue("data-item-id", ""));
                //Nazwa
                Console.WriteLine(ProductListItem.Descendants("span")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("offer-item-title")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));
                //Mieszkanie/Dom + Kupno/Sprzedaż + Dzielnica
                Console.WriteLine(ProductListItem.Descendants("p")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("text-nowrap")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                                  );
                // Metraż
                Console.WriteLine(ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("hidden-xs offer-item-area")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));
                //Koszt
                Console.WriteLine(ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("hidden-xs offer-item-price-per-m")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                                  );
                Console.WriteLine(ProductListItem.Descendants("li")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("offer-item-price")).FirstOrDefault().InnerText.Trim(' ', '\r', '\n', '\t')
                                  );
                //URL
                Console.WriteLine("URL OFERTY:   " +
                                  ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "")
                                  );
                //URL zdjęcia
                Console.WriteLine("URL zdjęcia:   " + ProductListItem.Descendants("span")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("img-cover lazy")).FirstOrDefault().GetAttributeValue("data-src", "")
                                  );

                Console.WriteLine();
            }
        }
Example #14
0
        static async void GetHtmlAsync()
        {
            var url        = "https://www.ebay.ca/sch/i.html?_odkw=xbox+one&_osacat=0&_from=R40&_trksid=p2045573.m570.l1313.TR12.TRC2.A0.H0.Xlaptop.TRS0&_nkw=laptop&_sacat=200";
            var httpclient = new HttpClient();

            var html = await httpclient.GetStringAsync(url);

            //parse html doc
            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);    //load doc for parsing
            var ProductsHtml = htmlDocument.DocumentNode.Descendants("ul")
                               .Where(node => node.GetAttributeValue("id", "")
                                      .Equals("ListViewInner")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("li")
                                   .Where(node => node.GetAttributeValue("id", "")
                                          .Contains("item")).ToList();

            Console.WriteLine(ProductListItems.Count());
            Console.WriteLine();
            foreach (var ProductListItem in ProductListItems)
            {
                //ID
                string pid = (ProductListItem.GetAttributeValue("listingid", ""));
                Console.WriteLine(pid);
                //ProductName
                string ProductName = ProductListItem.Descendants("h3")
                                     .Where(node => node.GetAttributeValue("class", "")
                                            .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t');
                Console.WriteLine(ProductName);
                //Price
                string price = ProductListItem.Descendants("li")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t');
                Console.WriteLine(price);


                //ID
                Console.WriteLine(ProductListItem.GetAttributeValue("listingid", ""));

                //ProductName
                Console.WriteLine(ProductListItem.Descendants("h3")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("lvtitle")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));

                //Price
                Console.WriteLine(ProductListItem.Descendants("li").Where(node => node.GetAttributeValue("class", "").Equals("lvprice prc")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t'));

                //ListingType
                string ListingType = ProductListItem.Descendants("li")
                                     .Where(node => node.GetAttributeValue("class", "")
                                            .Equals("lvformat")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t');
                //  Console.WriteLine(ListingType);
                //URL
                string purl = ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "");
                Console.WriteLine(purl);
                string csvRow = string.Format("{0},{1},{2}", pid, ProductName, price);

                // string path = @"D:\Nital\Summer 2020\BDAT 1007 - Social Data Mining\S20Practical\WebScraping_Ebay\data.txt";
                //File.AppendAllText(path, (csvRow));

                Console.WriteLine();
            }
        }
Example #15
0
        private static async void GetHtmlAsync()
        {
            var url        = "https://www.amazon.com/s?k=doom+patrol+omnibus&crid=2HBTXIE5Z803G&sprefix=Doom+pat%2Caps%2C176&ref=nb_sb_ss_i_4_8";
            var httpClient = new HttpClient();
            var html       = await httpClient.GetStringAsync(url);

            var htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(html);


            // Gets List
            var ProductsHtml = htmlDocument.DocumentNode.Descendants("div")
                               .Where(node => node.GetAttributeValue("class", "")
                                      .Equals("s-main-slot s-result-list s-search-results sg-row")).ToList();

            var ProductListItems = ProductsHtml[0].Descendants("div")
                                   .Where(node => node.GetAttributeValue("data-asin", "") //data-asin
                                          .Contains("1")).ToList();                       //1

            //Console.WriteLine(ProductListItems.Count());
            //Console.WriteLine();


            foreach (var ProductListItem in ProductListItems)
            {
                // id
                Console.WriteLine(ProductListItem.GetAttributeValue("data-index", ""));

                // ProductName
                Console.WriteLine(ProductListItem.Descendants("a")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Equals("a-link-normal a-text-normal")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                                  );

                //// Subtitle
                Console.WriteLine(ProductListItem.Descendants("span")
                                  .Where(node => node.GetAttributeValue("class", "")
                                         .Contains("a-")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                                  );

                // Price
                Console.WriteLine(
                    Regex.Match(
                        ProductListItem.Descendants("a")
                        .Where(node => node.GetAttributeValue("class", "")
                               .Equals("a-size-base a-link-normal a-text-normal")).FirstOrDefault().InnerText
                        , @"\$\d+.\d+")
                    );

                // Book Type
                Console.WriteLine(
                    ProductListItem.Descendants("a")
                    .Where(node => node.GetAttributeValue("class", "")
                           .Equals("a-size-base a-link-normal a-text-bold")).FirstOrDefault().InnerText.Trim('\r', '\n', '\t')
                    );


                //// Url
                //Console.WriteLine(
                //    ProductListItem.Descendants("a").FirstOrDefault().GetAttributeValue("href", "").Trim('\r', '\n', '\t')
                //    );

                //Console.WriteLine();
            }
        }
Example #16
0
        public static ProductsList GetProductListFromOlx(ProductsList productsList, string str)
        {
            string url      = ($"https://www.olx.pl/elektronika/komputery/katowice/q- {str} /?search%5Border%5D=filter_float_price%3Aasc&search%5Bdist%5D=5").Replace(" ", "");
            string urlPage2 = ($"https://www.olx.pl/elektronika/komputery/katowice/q- {str} /?search%5Border%5D=filter_float_price%3Aasc&search%5Bdist%5D=5").Replace(" ", "");

            List <string> urlList = new List <string>();

            for (int i = 1; i < 30; i++)
            {
                urlPage2 = ($"https://www.olx.pl/elektronika/komputery/katowice/q- {str} /?search%5Border%5D=filter_float_price%3Aasc&search%5Bdist%5D=5&page = {i}").Replace(" ", "");
                urlList.Add(urlPage2);
            }

            productsList        = new ProductsList();
            productsList.MyList = new List <Product>();

            Parallel.ForEach(urlList, (urlAdress, state) =>
            {
                var httpClient = new HttpClient();
                var html       = httpClient.GetStringAsync(urlAdress);

                var htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(html.Result);


                var ProductList = htmlDocument.DocumentNode.Descendants(0)
                                  .Where(n => n.HasClass("offer")).ToList();

                var sb = new StringBuilder();

                var test = htmlDocument.DocumentNode.Descendants("link")
                           .Select(node => node.GetAttributeValue("href", ""))
                           .FirstOrDefault();

                if (!test.Contains("search"))
                {
                    state.Break();
                }

                foreach (var ProductListItem in ProductList)
                {
                    string title;
                    string price;
                    string link;
                    string image;

                    try
                    {
                        title = ProductListItem.Descendants("a")
                                .Where(node => node.GetAttributeValue("data-cy", "")
                                       .Equals("listing-ad-title")).FirstOrDefault()
                                .InnerHtml.Trim('\r', '\t', '\n');

                        price = ProductListItem.Descendants("p")
                                .Where(node => node.GetAttributeValue("class", "")
                                       .Equals("price")).FirstOrDefault()
                                .InnerHtml.Trim('\r', '\t', '\n');

                        link = ProductListItem.Descendants("a")
                               .Select(node => node.GetAttributeValue("href", ""))
                               .FirstOrDefault().Trim();

                        image = ProductListItem.Descendants("img")
                                .Select(node => node.GetAttributeValue("src", ""))
                                .FirstOrDefault().Trim();
                    }
                    catch (Exception)
                    {
                        continue;
                    }

                    var newProduct = new Product()
                    {
                        Title = title.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", "").Replace("&quot;", ""),
                        Price = price.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", ""),
                        Link  = link.ToString().Trim('\r', '\t', '\n'),
                        Image = image.ToString().Trim('\r', '\t', '\n')
                    };
                    try
                    {
                        if (productsList.MyList.Any(x => x.Title == newProduct.Title))
                        {
                            continue;
                        }
                    }
                    catch (Exception)
                    {
                        continue;
                    }

                    productsList.MyList.Add(newProduct);
                }
            });


            //NON ASYNC

            //foreach (var urlAdress in urlList)
            //{
            //    var httpClient = new HttpClient();
            //    var html = await httpClient.GetStringAsync(urlAdress);

            //    var htmlDocument = new HtmlDocument();
            //    htmlDocument.LoadHtml(html);


            //    var ProductList = htmlDocument.DocumentNode.Descendants(0)
            //        .Where(n => n.HasClass("offer")).ToList();

            //    var sb = new StringBuilder();

            //    var test = htmlDocument.DocumentNode.Descendants("link")
            //                .Select(node => node.GetAttributeValue("href", ""))
            //                .FirstOrDefault();

            //    if (!test.Contains("search"))
            //    {
            //        break;
            //    }

            //    foreach (var ProductListItem in ProductList)
            //    {

            //        string title;
            //        string price;
            //        string link;
            //        string image;

            //        try
            //        {
            //            title = ProductListItem.Descendants("a")
            //                .Where(node => node.GetAttributeValue("data-cy", "")
            //                .Equals("listing-ad-title")).FirstOrDefault()
            //                .InnerHtml.Trim('\r', '\t', '\n');

            //            price = ProductListItem.Descendants("p")
            //                .Where(node => node.GetAttributeValue("class", "")
            //                .Equals("price")).FirstOrDefault()
            //                .InnerHtml.Trim('\r', '\t', '\n');

            //            link = ProductListItem.Descendants("a")
            //                .Select(node => node.GetAttributeValue("href", ""))
            //                .FirstOrDefault().Trim();

            //            image = ProductListItem.Descendants("img")
            //                .Select(node => node.GetAttributeValue("src", ""))
            //                .FirstOrDefault().Trim();
            //        }
            //        catch (Exception)
            //        {
            //            continue;
            //        }

            //        var newProduct = new Product()
            //        {
            //            Title = title.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", "").Replace("&quot;", ""),
            //            Price = price.ToString().Trim('\r', '\t', '\n').Replace("<strong>", "").Replace("</strong>", ""),
            //            Link = link.ToString().Trim('\r', '\t', '\n'),
            //            Image = image.ToString().Trim('\r', '\t', '\n')
            //        };
            //        if (result.MyList.Any(x => x.Title == newProduct.Title))
            //            continue;
            //        result.MyList.Add(newProduct);

            //    }
            //}


            //TODO: Order by price or search engine
            productsList.MyList = productsList.MyList.OrderBy(p => p.Price).ToList();


            return(productsList);
        }