示例#1
0
        public ChickenMenuRequestResponse GetPlaceMenu(string id)
        {
            logger.Info(string.Format("Fetching place menu: {0}", id));
            var returner = new List<ChickenMenu>();
            var page = new HtmlDocument();

            var client = new TimeoutWebClient();
            client.SetTimeout(4);
            client.Headers[HttpRequestHeader.UserAgent] = IOS_USER_AGENT;
            try
            {
                logger.Info(string.Format("Downloading JustEat page {0}", HOST + id));
                page.Load(client.OpenRead(HOST + id));

                logger.Debug("Downloaded page");
            }
            catch (Exception ex)
            {
                logger.Error("Could not fetch JustEat page", ex);
                return new ChickenMenuRequestResponse() { Result = null };
            }

            // Loop through each category
            var product_nodes = page.DocumentNode.SelectNodes(".//li[contains(@class, 'cat')]");
            if (product_nodes == null)
            {
                logger.Error("Could not select product_nodes");
                return new ChickenMenuRequestResponse() { Result = null };
            }

            foreach (var node in product_nodes)
            {
                // Check if the title contains chicken
                if (node.SelectSingleNode(".//h2[@class='trigger']").InnerText.ToLower().Contains("chicken"))
                {
                    // Go through each item and build our ChickenMenu list
                    foreach (var product_node in node.SelectNodes(".//li[contains(@class,'multi') or contains(@class,'single')]"))
                    {
                        // This is pretty f***ing fragile. A small change could break it.
                        // Solution? Wrap it in a catch-all :)
                        try
                        {
                            // Some products are an umbrella with multiple sub-products. This is handled here:
                            if (product_node.Attributes["class"].Value.ToLower().Contains("multi"))
                            {
                                string name = product_node.SelectSingleNode(".//h2").InnerText;
                                foreach (var sub_node in product_node.SelectNodes(".//li[contains(@class, 'product-row')]"))
                                {
                                    ChickenMenu item = new ChickenMenu();

                                    item.Name = string.Format("{0}: {1}", name, sub_node.SelectSingleNode(".//span[contains(@class,'vname')").InnerText);
                                    item.Price = Convert.ToDecimal(product_node.SelectSingleNode(".//span[contains(@class,'vprice')]").InnerText.Split(' ')[1]);
                                    returner.Add(item);
                                }
                            }
                            else
                            {
                                string name = product_node.SelectSingleNode(".//h3").InnerText;
                                ChickenMenu item = new ChickenMenu();
                                item.Name = name;
                                item.Price = Convert.ToDecimal(product_node.SelectSingleNode(".//span[contains(@class,'vprice')]").InnerText.Split(' ')[1]);
                                if (product_node.SelectSingleNode(".//div[@class='description']") != null)
                                {
                                    item.Description = product_node.SelectSingleNode(".//div[@class='description']").InnerText;
                                }
                                returner.Add(item);
                            }
                        }
                        catch (Exception ex) {
                            logger.Error("Error parsing ChickenMenuItem", ex);
                        }
                    }
                    // Break out of the loop if we have found our chicken header
                    break;
                }
            }

            return new ChickenMenuRequestResponse() { Result = returner };
        }
示例#2
0
        public List<ChickenPlace> GetAvailablePlaces(Location loc)
        {
            List<ChickenPlace> possible_places = new List<ChickenPlace>();
            List<ChickenPlace> found_places = new List<ChickenPlace>();

            var client = new TimeoutWebClient();
            client.SetTimeout(4);
            client.Headers[HttpRequestHeader.UserAgent] = IOS_USER_AGENT;

            var doc = new HtmlDocument();
            string query_url = string.Format(BASE_URL, loc.FirstPostCode);
            logger.Info(string.Format("Downloading page {0}", query_url));
            try
            {
                doc.Load(client.OpenRead(query_url));
            }
            catch (Exception ex)
            {
                logger.Error("Failed to download page", ex);
                return found_places;
            }

            var OpenSections = doc.GetElementbyId("OpenRestaurants");
            var OpenPlaceNodes = OpenSections.SelectNodes(".//li");
            if (OpenPlaceNodes == null)
            {
                logger.Info("Found no OpenPlaceNodes, returning");
                return found_places;
            }

            // Go through each takeaway and discard some based on their cuisine types.
            foreach (var TakeAway in OpenPlaceNodes)
            {
                var place = new ChickenPlace();
                place.Source = SOURCE_NAME;
                place.MenuAvaiable = this.SupportsMenu();
                // Get the <a> tag containing the name and the link to the place
                var name_link = TakeAway.SelectSingleNode("a[contains(@class,'name')]");
                place.Id = name_link.Attributes["href"].Value;

                // Extract the title of the place. Some titles have <span> elements with "sponsored" in them, if this is the case then skip it.
                var takeaway_name_base = name_link.SelectSingleNode("h2");
                var name_node = takeaway_name_base;
                if (takeaway_name_base.SelectSingleNode("span") != null) name_node = takeaway_name_base.SelectSingleNode("span");

                place.Name = RemoveUselessCharacters(takeaway_name_base.InnerText).Replace("sponsored", string.Empty).Replace("Sponsored", string.Empty);

                // Get the <div> that contains the rating and cuisine types
                var place_details = name_link.SelectSingleNode("./div[@class='restaurantDetails']");
                // Get a list of cuisine types
                string cuisine_string = place_details.SelectSingleNode("./p[@class='cuisineTypeList']").InnerText.Trim();
                string[] cuisine_list = (from cuisine in cuisine_string.Split(',') select RemoveUselessCharacters(cuisine).ToLower()).ToArray();
                // Take the list of cuisines and intersect it with the allowed cuisine list
                var cuisine_intersection = ALLOWED_CUISINES.Intersect(cuisine_list);
                if (cuisine_intersection.Count() == 0) continue;

                // Get the rating. Its a bit of a hack, but w/e
                var rating_node = place_details.SelectSingleNode("./p[contains(@class,'rating')]");
                // The rating itself is in the class.
                //place.Rating = NumberScale.ScaleNumber(Convert.ToInt32(rating_node.Attributes["class"].Value.Split('-')[1]), 0, 60, 100, 0);

                if (!DB.DoesChickenPlaceNotHaveChicken(place))
                {
                    var saved_results = DB.GetChickenPlaceById(SOURCE_NAME, new string[] { place.Id });
                    if (saved_results.Count() == 0)
                    {
                        possible_places.Add(place);
                    }
                    else
                    {
                        found_places.Add(saved_results[0]);
                    }
                };
            };

            Parallel.ForEach(possible_places, place =>
            {
                // Ok. Now we have to fetch the actual menu page
                var menu_doc = new HtmlDocument();
                var menu_client = new TimeoutWebClient();
                //menu_client.SetTimeout(3);
                try
                {
                    menu_doc.Load(menu_client.OpenRead(HOST + place.Id));
                }
                catch (Exception ex)
                {
                    logger.Error(string.Format("Could not download JustEat page for place {0}", place.Id), ex);
                    return;
                }

                // Check if they actually serve fried chicken
                // XPath has now lower-case function (for some insane reason), hence the use of the rather ugly translate hack.
                var has_chicken = menu_doc.DocumentNode.SelectSingleNode(@".//h2[@class='H2MC' and
                                                                contains(translate(text(),
                                                                                   'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
                                                                                   'abcdefghijklmnopqrstuvwxyz'),
                                                                         'chicken')]");
                if (has_chicken == null)
                {
                    // No chicken here. Create a tombstone
                    place.HasChicken = false;
                    DB.AddChickenPlace(place);
                    return;
                };

                // Get the address (used for geolocating)
                var address_node = menu_doc.DocumentNode.SelectSingleNode(".//span[@itemtype='http://schema.org/PostalAddress']");

                if (address_node == null) {
                    logger.Error(string.Format("Could not find address for {0}", place.Id));
                    return;
                }

                var address_street = address_node.SelectSingleNode(".//span[@itemprop='streetAddress']").InnerText;
                var address_place = address_node.SelectSingleNode(".//span[@itemprop='addressLocality']").InnerText;
                var address_postcode = address_node.SelectSingleNode(".//span[@itemprop='postalCode']").InnerText;
                place.Address = string.Format("{0}, {1}, {2}", address_street, address_place, address_postcode).Trim();

                lock (found_places) found_places.Add(place);
                place.HasChicken = true;
                DB.AddChickenPlace(place);

            });

            // Lets GeoCode the *shit* out of our places using YQL
            YQL.GeoLocatePlaces(ref found_places);

            return found_places;
        }
        public ChickenMenuRequestResponse GetPlaceMenu(string id)
        {
            this.logger.Debug("Got ChickenMenu request");
            var returner = new List<ChickenMenu>();

            TimeoutWebClient client = new TimeoutWebClient();
            client.SetTimeout(2);
            client.Headers[HttpRequestHeader.UserAgent] = CHROME_USER_AGENT;
            HtmlDocument doc = new HtmlDocument();
            string menu_url = string.Format(MENU_URL, id);
            logger.Error(string.Format("Downloading page {0}", menu_url));

            string page_html;
            try
            {
                page_html = client.DownloadString(menu_url);
                logger.Debug("Downloaded page");
            }
            catch (Exception e)
            {
                logger.Error(string.Format("Could not fetch menu page: {0}", menu_url), e);
                return new ChickenMenuRequestResponse() { Result = null };
            }

            doc.LoadHtml(page_html);

            foreach (var item_node in doc.DocumentNode.SelectNodes(".//tr[contains(@class, 'menuItem')]"))
            {
                string item_name;
                decimal price;

                try
                {
                    item_name = item_node.SelectSingleNode(".//div[@class='menuItemName']/a").InnerText;
                }
                catch (Exception e)
                {
                    logger.Error("Could not get menuItemName from item_node", e);
                    continue;
                }

                try
                {
                    // http://stackoverflow.com/questions/3575331/how-do-extract-decimal-number-from-string-in-c-sharp
                    string to_extract = item_node.SelectSingleNode(".//div[@class='menuItemPrice']").InnerText;
                    var match = Regex.Split(to_extract, @"[^0-9\.]+").Where(c => c != "." && c.Trim() != "").ToArray();
                    if (match.Count() == 0)
                    {
                        logger.Error("Got 0 matches for price");
                        continue;
                    }
                    price = Convert.ToDecimal(match[0]);
                }
                catch (FormatException ex)
                {
                    logger.Error("Could not convert price to decimal", ex);
                    continue;
                }

                returner.Add(new ChickenMenu() { Name = item_name, Price = price });
            }

            return new ChickenMenuRequestResponse() { Result = returner };
        }