public void RegisterPages(RequestArgs args) { if (Site != null) { UriBuilder uriBuilder = new UriBuilder(Site.URL); var param = HttpUtility.ParseQueryString(String.Empty); param["rooms"] = "1"; param["adults"] = "2"; param["city_id"] = "800013148"; // Travel.com "Londons" City ID param["currency"] = "GBP"; param["check_in"] = "22/04/2019"; param["check_out"] = "23/04/2018"; param["page"] = "1"; for (int i = 1; i < 10; i++) { param["page"] = i.ToString(); uriBuilder.Path = "hotels/results/"; uriBuilder.Query = param.ToString(); var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("rs_main_css")); layout.AddNode(new NodeRequest { Property = "Name", XPath = "//div/article/div[2]/div[1]/div[2]/a" }); } Scraper.AddSite(Site); } else { throw new Exception("Site was not initialised in class with a valid url"); } }
public void RegisterPages(RequestArgs args) { if (Site != null) { UriBuilder uriBuilder = new UriBuilder(Site.URL); var param = HttpUtility.ParseQueryString(String.Empty); param["qRms"] = args.Rooms; param["qAdlt"] = args.People; param["qDest"] = args.City; param["qRad"] = "300"; // Radius to search param["qRdU"] = "mi"; // Distance Units //param["currency"] = "GBP"; string[] checkin = new string[2]; string[] checkout = new string[2]; if (args != null) { checkin = ConvertDate(args.CheckIn); checkout = ConvertDate(args.CheckOut); } param["qCiD"] = checkin[0]; // Check in Date param["qCiMy"] = checkin[1]; // Check in Month/Year (Month - 1) param["qCoD"] = checkout[0]; // Check out Date param["qCoMy"] = checkout[1]; // Check out Money/Year (Month - 1) uriBuilder.Path = "hotels/gb/en/find-hotels/hotel/list"; uriBuilder.Query = param.ToString(); var js = "angular.element(document.evaluate('//*[@id=\"applicationWrapper\"]/div[2]/div/div/div[9]/div[2]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue).scope().hotelList.bottomInView();"; var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.ClassName("infoSummary"), String.Concat(Enumerable.Repeat(js, 60)), "//div/hotel-row", 850); layout.AddNode(new NodeRequest { Property = "Name", XPath = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a" }); layout.AddNode(new NodeRequest { Property = "HotelURL", XPath = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a", Attribute = "href", }); layout.AddNode(new NodeRequest { Property = "City", XPath = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[2]/a" }); layout.AddNode(new NodeRequest { Property = "Address", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[1]" }); layout.AddNode(new NodeRequest { Property = "Postcode", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[1]" }); layout.AddNode(new NodeRequest { Property = "Country", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[2]" }); layout.AddNode(new NodeRequest { Property = "Phonenumber", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-phone-number/div[2]/div/div/a" }); layout.AddNode(new NodeRequest { Property = "PriceL", XPath = "//div[1]/div/div/div[3]/div[1]/div/div/span[1]" }); layout.AddNode(new NodeRequest { Property = "PriceS", XPath = "//div[1]/div/div/div[3]/div[1]/div/div/span[2]" }); layout.AddNode(new NodeRequest { Property = "Available", XPath = "//div[1]/div/div/div[3]/div/div/div[1]" }); layout.AddNode(new NodeRequest { Property = "Currency", XPath = "//div[1]/div/div/div[3]/div[1]/div/span[2]" }); layout.AddNode(new NodeRequest { Property = "Extras", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[2]/ul" }); Scraper.AddSite(Site); } }
public void RegisterPages(RequestArgs args) { if (Site != null) { UriBuilder uriBuilder = new UriBuilder(Site.URL); // https://www.ihg.com/ // e.g. query: https://www.ihg.com/hotels/gb/en/find-hotels/hotel/list?qDest=London,%20United%20Kingdom& // qCiMy=32019 // qCiD=21 // qCoMy=32019 // qCoD=22 // qAdlt=2 // qChld=0 // qRms=1 // qRtP=6CBARC // qAkamaiCC=GB // qSrt=sDD // qBrs=re.ic.in.vn.cp.vx.hi.ex.rs.cv.sb.cw.ma.ul.ki.va // srb_u=0 // qRad=30 // qRdU=mi var param = HttpUtility.ParseQueryString(String.Empty); param["qRms"] = args.Rooms; param["qAdlt"] = args.People; param["qDest"] = args.City; // Travel.com "Londons" City ID param["qRad"] = "300"; // Radius to search param["qRdU"] = "mi"; // Distance Units //param["currency"] = "GBP"; string[] checkin = new string[2]; string[] checkout = new string[2]; if (args != null) { checkin = ConvertDate(args.CheckIn); checkout = ConvertDate(args.CheckOut); } param["qCiD"] = checkin[0]; // Check in Date param["qCiMy"] = checkin[1]; // Check in Month/Year (Month - 1) param["qCoD"] = checkout[0]; // Check out Date param["qCoMy"] = checkout[1]; // Check out Money/Year (Month - 1) uriBuilder.Path = "hotels/gb/en/find-hotels/hotel/list"; uriBuilder.Query = param.ToString(); var js = "angular.element(document.evaluate('//*[@id=\"applicationWrapper\"]/div[2]/div/div/div[9]/div[2]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue).scope().hotelList.bottomInView();"; var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("topOfPage"), String.Concat(Enumerable.Repeat(js, 60)), "//div/hotel-row", 850); layout.AddNode(new NodeRequest { Property = "Name", XPath = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a" }); layout.AddNode(new NodeRequest { Property = "HotelURL", XPath = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a", Attribute = "href", }); layout.AddNode(new NodeRequest { Property = "City", XPath = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[2]/a" }); layout.AddNode(new NodeRequest { Property = "Address", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[1]" }); layout.AddNode(new NodeRequest { Property = "Postcode", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[1]" }); layout.AddNode(new NodeRequest { Property = "Country", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[2]" }); layout.AddNode(new NodeRequest { Property = "Phonenumber", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-phone-number/div[2]/div/div/a" }); layout.AddNode(new NodeRequest { Property = "PriceL", XPath = "//div[1]/div/div/div[3]/div[1]/div/div/span[1]" }); layout.AddNode(new NodeRequest { Property = "PriceS", XPath = "//div[1]/div/div/div[3]/div[1]/div/div/span[2]" }); layout.AddNode(new NodeRequest { Property = "Available", XPath = "//div[1]/div/div/div[3]/div/div/div[1]" }); layout.AddNode(new NodeRequest { Property = "Currency", XPath = "//div[1]/div/div/div[3]/div[1]/div/span[2]" }); layout.AddNode(new NodeRequest { Property = "Extras", XPath = "//div[1]/div/div/hotel-details/div/div/div[2]/div[2]/ul" }); Scraper.AddSite(Site); } }
public void RegisterPages(RequestArgs args = null) { //https://www.expedia.co.uk/Hotel-Search //?destination=London+(and+vicinity),+England,+United+Kingdom&s //startDate=24/04/2019& //endDate=25/04/2019& //adults=1& //regionId=178279& //lodging=hotel& //sort=distance UriBuilder uriBuilder = new UriBuilder(Site.URL); var param = HttpUtility.ParseQueryString(String.Empty); param["destination"] = args.City; param["startDate"] = args.CheckIn.ToShortDateString(); param["endDate"] = args.CheckOut.ToShortDateString(); param["adults"] = args.People; param["lodging"] = "hotel"; param["sort"] = "distance"; param["page"] = "1"; for (int i = 1; i < 5; i++) { param["page"] = i.ToString(); uriBuilder.Path = "Hotel-Search"; uriBuilder.Query = param.ToString(); var page = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("resultsContainer"), xPathFilter: "//article", pageDelay: 50); page.AddNode(new NodeRequest { Property = "Name", XPath = "//h3" }); page.AddNode(new NodeRequest { Property = "HotelURL", XPath = "//div[2]/div/a", Attribute = "href" }); page.AddNode(new NodeRequest { Property = "Address", XPath = "//div[2]/div/a", Attribute = "href", Recursive = true, RecursiveXPath = "//section/div[1]/div/a/span[1]" }); page.AddNode(new NodeRequest { Property = "City", XPath = "//*[contains(concat(' ',normalize-space(@class),' '),' neighborhood secondary ')]" // Returns all elements with the given class id }); page.AddNode(new NodeRequest { Property = "Phonenumber", XPath = "//div[2]/div/div[1]/div[2]/ul[1]/li[12]/span" }); page.AddNode(new NodeRequest { Property = "PriceL", XPath = "//*[contains(concat(' ',normalize-space(@class),' '),' actualPrice ')]" }); page.AddNode(new NodeRequest { Property = "Extras", XPath = "//div[2]/div/a", Attribute = "href", Recursive = true, RecursiveXPath = "/html/body/div[3]/div[9]/section/div[15]/div[2]/div[1]/div" }); } Scraper.AddSite(Site); }
public void RegisterPages(RequestArgs args = null) { //https://www.booking.com/searchresults.en-gb.html? //checkin_month=4& //checkin_monthday=26& //checkin_year=2019& //checkout_month=4& //checkout_monthday=27& //checkout_year=2019& //class_interval=1& //group_adults=1& //group_children=0& //no_rooms=1& //order=distance_from_search& //room1=A& //ss=Central%20London%2C%20London%2C%20Greater%20London%2C%20United%20Kingdom& //rows=50& (Amount of Rows on page) //offset=60 (Offset for the page) UriBuilder uriBuilder = new UriBuilder(Site.URL); var param = HttpUtility.ParseQueryString(String.Empty); string[] checkin = ConvertDate(args.CheckIn); string[] checkout = ConvertDate(args.CheckOut); param["checkin_month"] = checkin[1]; param["checkin_monthday"] = checkin[0]; param["checkin_year"] = checkin[2]; param["checkout_month"] = checkout[1]; param["checkout_monthday"] = checkout[0]; param["checkout_year"] = checkout[2]; param["group_adults"] = args.People; param["group_children"] = "0"; param["no_rooms"] = args.Rooms; param["map"] = "0"; param["ss"] = args.City; param["rows"] = "50"; param["order"] = "distance_from_search"; param["percent_htype_hotel"] = "1"; param["shw_aparth"] = "0"; param["nflt"] = "ht_id%3D204%3B"; int pages = 7; for (int i = 0; i < 50 * pages; i = i + 50) { uriBuilder.Path = "searchresults.en-gb.html"; param["offset"] = i.ToString(); uriBuilder.Query = param.ToString(); var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("hotellist_inner"), xPathFilter: "//*[@id='hotellist_inner']/div/div[2]"); layout.AddNode(new NodeRequest { Property = "Name", XPath = "//div[1]/div[1]/h3/a/span[1]", }); layout.AddNode(new NodeRequest { Property = "HotelURL", XPath = "//div[1]/div[1]/h3/a", Attribute = "href", }); layout.AddNode(new NodeRequest { Property = "Address", XPath = "//div[1]/div[1]/a", Attribute = "href", Recursive = true, RecursiveXPath = "//*[@id='showMap2']/span[2]", }); layout.AddNode(new NodeRequest { Property = "City", XPath = "//div[1]/div[1]/h3/a/span[1]" }); layout.AddNode(new NodeRequest { Property = "PriceL", XPath = "//div[contains(concat(' ',normalize-space(@class),' '),' roomPrice ')]" // '//div[2]/div/div/div[1]/div/div[2]/div[2]/strong/b' }); layout.AddNode(new NodeRequest { Property = "Extras", XPath = "//div[1]/div[1]/h3/a", Attribute = "href", Recursive = true, RecursiveXPath = "//*[@id='hp_facilities_box']/div[4]/div[11]/ul" }); } Scraper.AddSite(Site); }
public void RegisterPages(RequestArgs args = null) { //https://uk.hotels.com/search.do? //q-destination=London,%20England,%20United%20Kingdom& //q-check-in=2019-04-26& //q-check-out=2019-04-27& //q-rooms=1& //q-room-0-adults=1& //q-room-0-children=0& //sort-order=DISTANCE_FROM_LANDMARK& UriBuilder uriBuilder = new UriBuilder(Site.URL); var param = HttpUtility.ParseQueryString(String.Empty); param["q-destination"] = args.City; param["q-check-in"] = ConvertDate(args.CheckIn); param["q-check-out"] = ConvertDate(args.CheckOut); param["q-rooms"] = args.Rooms; param["q-room-0-adults"] = args.People; param["q-room-0-children"] = "0"; param["sort-order"] = "DISTANCE_FROM_LANDMARK"; uriBuilder.Path = "search.do"; uriBuilder.Query = param.ToString(); string js = "setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent()},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750);"; var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.ClassName("hotel"), js, "//*[@id='listings']/ol/li/article/section", 30000); layout.AddNode(new NodeRequest { Property = "Name", XPath = "//div/h3/a" }); layout.AddNode(new NodeRequest { Property = "HotelURL", XPath = "//div/h3/a", Attribute = "href" }); layout.AddNode(new NodeRequest { Property = "Address", XPath = "//div/address/span" }); layout.AddNode(new NodeRequest { Property = "City", XPath = "//div/div/div[1]/div[1]/a" }); layout.AddNode(new NodeRequest { Property = "PriceL", XPath = "//aside/div[1]/a/strong" }); layout.AddNode(new NodeRequest { Property = "Extras", XPath = "//div/h3/a", Attribute = "href", Recursive = true, RecursiveXPath = "//*[@id='overview - section - 4']/ul[1]" }); Scraper.AddSite(Site); }