Exemple #1
0
        public void RegisterPages(RequestArgs args)
        {
            if (Site != null)
            {
                UriBuilder uriBuilder = new UriBuilder(Site.URL);

                var param = HttpUtility.ParseQueryString(String.Empty);
                param["rooms"]     = "1";
                param["adults"]    = "2";
                param["city_id"]   = "800013148"; // Travel.com "Londons" City ID
                param["currency"]  = "GBP";
                param["check_in"]  = "22/04/2019";
                param["check_out"] = "23/04/2018";
                param["page"]      = "1";

                for (int i = 1; i < 10; i++)
                {
                    param["page"]    = i.ToString();
                    uriBuilder.Path  = "hotels/results/";
                    uriBuilder.Query = param.ToString();
                    var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("rs_main_css"));

                    layout.AddNode(new NodeRequest {
                        Property = "Name", XPath = "//div/article/div[2]/div[1]/div[2]/a"
                    });
                }


                Scraper.AddSite(Site);
            }
            else
            {
                throw new Exception("Site was not initialised in class with a valid url");
            }
        }
Exemple #2
0
        public void RegisterPages(RequestArgs args)
        {
            if (Site != null)
            {
                UriBuilder uriBuilder = new UriBuilder(Site.URL);

                var param = HttpUtility.ParseQueryString(String.Empty);
                param["qRms"]  = args.Rooms;
                param["qAdlt"] = args.People;
                param["qDest"] = args.City;
                param["qRad"]  = "300"; // Radius to search
                param["qRdU"]  = "mi";  // Distance Units
                //param["currency"] = "GBP";

                string[] checkin  = new string[2];
                string[] checkout = new string[2];
                if (args != null)
                {
                    checkin  = ConvertDate(args.CheckIn);
                    checkout = ConvertDate(args.CheckOut);
                }

                param["qCiD"]  = checkin[0];  // Check in Date
                param["qCiMy"] = checkin[1];  // Check in Month/Year (Month - 1)
                param["qCoD"]  = checkout[0]; // Check out Date
                param["qCoMy"] = checkout[1]; // Check out Money/Year (Month - 1)

                uriBuilder.Path  = "hotels/gb/en/find-hotels/hotel/list";
                uriBuilder.Query = param.ToString();

                var js =
                    "angular.element(document.evaluate('//*[@id=\"applicationWrapper\"]/div[2]/div/div/div[9]/div[2]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue).scope().hotelList.bottomInView();";

                var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.ClassName("infoSummary"), String.Concat(Enumerable.Repeat(js, 60)), "//div/hotel-row", 850);

                layout.AddNode(new NodeRequest
                {
                    Property = "Name",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a"
                });
                layout.AddNode(new NodeRequest
                {
                    Property  = "HotelURL",
                    XPath     = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a",
                    Attribute = "href",
                });

                layout.AddNode(new NodeRequest
                {
                    Property = "City",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[2]/a"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Address",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Postcode",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Country",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[2]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Phonenumber",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-phone-number/div[2]/div/div/a"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "PriceL",
                    XPath    = "//div[1]/div/div/div[3]/div[1]/div/div/span[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "PriceS",
                    XPath    = "//div[1]/div/div/div[3]/div[1]/div/div/span[2]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Available",
                    XPath    = "//div[1]/div/div/div[3]/div/div/div[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Currency",
                    XPath    = "//div[1]/div/div/div[3]/div[1]/div/span[2]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Extras",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[2]/ul"
                });

                Scraper.AddSite(Site);
            }
        }
Exemple #3
0
        public void RegisterPages(RequestArgs args)
        {
            if (Site != null)
            {
                UriBuilder uriBuilder = new UriBuilder(Site.URL);

                // https://www.ihg.com/
                // e.g. query: https://www.ihg.com/hotels/gb/en/find-hotels/hotel/list?qDest=London,%20United%20Kingdom&
                // qCiMy=32019
                // qCiD=21
                // qCoMy=32019
                // qCoD=22
                // qAdlt=2
                // qChld=0
                // qRms=1
                // qRtP=6CBARC
                // qAkamaiCC=GB
                // qSrt=sDD
                // qBrs=re.ic.in.vn.cp.vx.hi.ex.rs.cv.sb.cw.ma.ul.ki.va
                // srb_u=0
                // qRad=30
                // qRdU=mi


                var param = HttpUtility.ParseQueryString(String.Empty);
                param["qRms"]  = args.Rooms;
                param["qAdlt"] = args.People;
                param["qDest"] = args.City; // Travel.com "Londons" City ID
                param["qRad"]  = "300";     // Radius to search
                param["qRdU"]  = "mi";      // Distance Units
                //param["currency"] = "GBP";

                string[] checkin  = new string[2];
                string[] checkout = new string[2];
                if (args != null)
                {
                    checkin  = ConvertDate(args.CheckIn);
                    checkout = ConvertDate(args.CheckOut);
                }

                param["qCiD"]  = checkin[0];  // Check in Date
                param["qCiMy"] = checkin[1];  // Check in Month/Year (Month - 1)
                param["qCoD"]  = checkout[0]; // Check out Date
                param["qCoMy"] = checkout[1]; // Check out Money/Year (Month - 1)

                uriBuilder.Path  = "hotels/gb/en/find-hotels/hotel/list";
                uriBuilder.Query = param.ToString();


                var js =
                    "angular.element(document.evaluate('//*[@id=\"applicationWrapper\"]/div[2]/div/div/div[9]/div[2]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue).scope().hotelList.bottomInView();";

                var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("topOfPage"), String.Concat(Enumerable.Repeat(js, 60)), "//div/hotel-row", 850);

                layout.AddNode(new NodeRequest
                {
                    Property = "Name",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a"
                });
                layout.AddNode(new NodeRequest
                {
                    Property  = "HotelURL",
                    XPath     = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[1]/a",
                    Attribute = "href",
                });

                layout.AddNode(new NodeRequest
                {
                    Property = "City",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[1]/div/div/span[2]/a"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Address",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Postcode",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Country",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-address/div/div/span[3]/span[2]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Phonenumber",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[1]/hotel-phone-number/div[2]/div/div/a"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "PriceL",
                    XPath    = "//div[1]/div/div/div[3]/div[1]/div/div/span[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "PriceS",
                    XPath    = "//div[1]/div/div/div[3]/div[1]/div/div/span[2]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Available",
                    XPath    = "//div[1]/div/div/div[3]/div/div/div[1]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Currency",
                    XPath    = "//div[1]/div/div/div[3]/div[1]/div/span[2]"
                });
                layout.AddNode(new NodeRequest
                {
                    Property = "Extras",
                    XPath    = "//div[1]/div/div/hotel-details/div/div/div[2]/div[2]/ul"
                });

                Scraper.AddSite(Site);
            }
        }
Exemple #4
0
        public void RegisterPages(RequestArgs args = null)
        {
            //https://www.expedia.co.uk/Hotel-Search
            //?destination=London+(and+vicinity),+England,+United+Kingdom&s
            //startDate=24/04/2019&
            //endDate=25/04/2019&
            //adults=1&
            //regionId=178279&
            //lodging=hotel&
            //sort=distance

            UriBuilder uriBuilder = new UriBuilder(Site.URL);

            var param = HttpUtility.ParseQueryString(String.Empty);

            param["destination"] = args.City;
            param["startDate"]   = args.CheckIn.ToShortDateString();
            param["endDate"]     = args.CheckOut.ToShortDateString();
            param["adults"]      = args.People;
            param["lodging"]     = "hotel";
            param["sort"]        = "distance";
            param["page"]        = "1";

            for (int i = 1; i < 5; i++)
            {
                param["page"]    = i.ToString();
                uriBuilder.Path  = "Hotel-Search";
                uriBuilder.Query = param.ToString();

                var page = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("resultsContainer"), xPathFilter: "//article", pageDelay: 50);

                page.AddNode(new NodeRequest
                {
                    Property = "Name",
                    XPath    = "//h3"
                });

                page.AddNode(new NodeRequest
                {
                    Property  = "HotelURL",
                    XPath     = "//div[2]/div/a",
                    Attribute = "href"
                });

                page.AddNode(new NodeRequest
                {
                    Property       = "Address",
                    XPath          = "//div[2]/div/a",
                    Attribute      = "href",
                    Recursive      = true,
                    RecursiveXPath = "//section/div[1]/div/a/span[1]"
                });

                page.AddNode(new NodeRequest
                {
                    Property = "City",
                    XPath    = "//*[contains(concat(' ',normalize-space(@class),' '),' neighborhood secondary ')]" // Returns all elements with the given class id
                });

                page.AddNode(new NodeRequest
                {
                    Property = "Phonenumber",
                    XPath    = "//div[2]/div/div[1]/div[2]/ul[1]/li[12]/span"
                });

                page.AddNode(new NodeRequest
                {
                    Property = "PriceL",
                    XPath    = "//*[contains(concat(' ',normalize-space(@class),' '),' actualPrice ')]"
                });

                page.AddNode(new NodeRequest
                {
                    Property       = "Extras",
                    XPath          = "//div[2]/div/a",
                    Attribute      = "href",
                    Recursive      = true,
                    RecursiveXPath = "/html/body/div[3]/div[9]/section/div[15]/div[2]/div[1]/div"
                });
            }

            Scraper.AddSite(Site);
        }
Exemple #5
0
        public void RegisterPages(RequestArgs args = null)
        {
            //https://www.booking.com/searchresults.en-gb.html?
            //checkin_month=4&
            //checkin_monthday=26&
            //checkin_year=2019&
            //checkout_month=4&
            //checkout_monthday=27&
            //checkout_year=2019&
            //class_interval=1&
            //group_adults=1&
            //group_children=0&
            //no_rooms=1&
            //order=distance_from_search&
            //room1=A&
            //ss=Central%20London%2C%20London%2C%20Greater%20London%2C%20United%20Kingdom&
            //rows=50& (Amount of Rows on page)
            //offset=60 (Offset for the page)

            UriBuilder uriBuilder = new UriBuilder(Site.URL);

            var param = HttpUtility.ParseQueryString(String.Empty);

            string[] checkin  = ConvertDate(args.CheckIn);
            string[] checkout = ConvertDate(args.CheckOut);
            param["checkin_month"]     = checkin[1];
            param["checkin_monthday"]  = checkin[0];
            param["checkin_year"]      = checkin[2];
            param["checkout_month"]    = checkout[1];
            param["checkout_monthday"] = checkout[0];
            param["checkout_year"]     = checkout[2];

            param["group_adults"]        = args.People;
            param["group_children"]      = "0";
            param["no_rooms"]            = args.Rooms;
            param["map"]                 = "0";
            param["ss"]                  = args.City;
            param["rows"]                = "50";
            param["order"]               = "distance_from_search";
            param["percent_htype_hotel"] = "1";
            param["shw_aparth"]          = "0";
            param["nflt"]                = "ht_id%3D204%3B";

            int pages = 7;

            for (int i = 0; i < 50 * pages; i = i + 50)
            {
                uriBuilder.Path = "searchresults.en-gb.html";
                param["offset"] = i.ToString();

                uriBuilder.Query = param.ToString();

                var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.Id("hotellist_inner"), xPathFilter: "//*[@id='hotellist_inner']/div/div[2]");

                layout.AddNode(new NodeRequest
                {
                    Property = "Name",
                    XPath    = "//div[1]/div[1]/h3/a/span[1]",
                });

                layout.AddNode(new NodeRequest
                {
                    Property  = "HotelURL",
                    XPath     = "//div[1]/div[1]/h3/a",
                    Attribute = "href",
                });

                layout.AddNode(new NodeRequest
                {
                    Property       = "Address",
                    XPath          = "//div[1]/div[1]/a",
                    Attribute      = "href",
                    Recursive      = true,
                    RecursiveXPath = "//*[@id='showMap2']/span[2]",
                });

                layout.AddNode(new NodeRequest
                {
                    Property = "City",
                    XPath    = "//div[1]/div[1]/h3/a/span[1]"
                });

                layout.AddNode(new NodeRequest
                {
                    Property = "PriceL",
                    XPath    = "//div[contains(concat(' ',normalize-space(@class),' '),' roomPrice ')]"
                               //      '//div[2]/div/div/div[1]/div/div[2]/div[2]/strong/b'
                });

                layout.AddNode(new NodeRequest
                {
                    Property       = "Extras",
                    XPath          = "//div[1]/div[1]/h3/a",
                    Attribute      = "href",
                    Recursive      = true,
                    RecursiveXPath = "//*[@id='hp_facilities_box']/div[4]/div[11]/ul"
                });
            }


            Scraper.AddSite(Site);
        }
Exemple #6
0
        public void RegisterPages(RequestArgs args = null)
        {
            //https://uk.hotels.com/search.do?
            //q-destination=London,%20England,%20United%20Kingdom&
            //q-check-in=2019-04-26&
            //q-check-out=2019-04-27&
            //q-rooms=1&
            //q-room-0-adults=1&
            //q-room-0-children=0&
            //sort-order=DISTANCE_FROM_LANDMARK&

            UriBuilder uriBuilder = new UriBuilder(Site.URL);

            var param = HttpUtility.ParseQueryString(String.Empty);

            param["q-destination"]     = args.City;
            param["q-check-in"]        = ConvertDate(args.CheckIn);
            param["q-check-out"]       = ConvertDate(args.CheckOut);
            param["q-rooms"]           = args.Rooms;
            param["q-room-0-adults"]   = args.People;
            param["q-room-0-children"] = "0";
            param["sort-order"]        = "DISTANCE_FROM_LANDMARK";

            uriBuilder.Path  = "search.do";
            uriBuilder.Query = param.ToString();

            string js =
                "setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent(),setTimeout(function(){dio.widget.InfiniteScroll.instances[0].requestContent()},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750)},750);";

            var layout = Site.AddPage(uriBuilder.Uri.PathAndQuery.Substring(1), By.ClassName("hotel"),
                                      js, "//*[@id='listings']/ol/li/article/section", 30000);

            layout.AddNode(new NodeRequest
            {
                Property = "Name",
                XPath    = "//div/h3/a"
            });

            layout.AddNode(new NodeRequest
            {
                Property  = "HotelURL",
                XPath     = "//div/h3/a",
                Attribute = "href"
            });

            layout.AddNode(new NodeRequest
            {
                Property = "Address",
                XPath    = "//div/address/span"
            });

            layout.AddNode(new NodeRequest
            {
                Property = "City",
                XPath    = "//div/div/div[1]/div[1]/a"
            });

            layout.AddNode(new NodeRequest
            {
                Property = "PriceL",
                XPath    = "//aside/div[1]/a/strong"
            });

            layout.AddNode(new NodeRequest
            {
                Property       = "Extras",
                XPath          = "//div/h3/a",
                Attribute      = "href",
                Recursive      = true,
                RecursiveXPath = "//*[@id='overview - section - 4']/ul[1]"
            });

            Scraper.AddSite(Site);
        }