/// <summary> /// Check if a tombstone exists in the database saying that this place does *not* have any chicken. Returns true if we are sure this place /// doesn't have any lovely chicken, otherwise return false which means we should check (and create a tombstone if it doesn't) /// </summary> /// <param name="place"></param> /// <returns></returns> public static bool DoesChickenPlaceNotHaveChicken(ChickenPlace place) { using (var conn = GetConnection()) { DateTime ExpireTime = DateTime.UtcNow.AddMonths(-2); var result = conn.Select<ChickenPlace>(q => q.Id == place.Id && q.Source == place.Source && q.HasChicken == false); if (result.Count() > 0) return true; return false; } }
public List<ChickenPlace> GetAvailablePlaces(Location loc) { List<ChickenPlace> returner = new List<ChickenPlace>(); var client = new WebClient(); string data = client.DownloadString(string.Format(BASE_URL, loc.Lat, loc.Long)); List<dynamic> converted_data; try { converted_data = JsonConvert.DeserializeObject<List<dynamic>>(data); } catch (Exception ex) { logger.Error("Could not deserialize JSON", ex); return returner; } foreach (dynamic entry in converted_data) { // So ugly StringBuilder Address = new StringBuilder(); try { string[] possible_addresses = new string[] { entry.address1.ToString(), entry.address2.ToString(), entry.address3.ToString(), entry.postcode.ToString() }; string Addresses = string.Join("\n", (from address in possible_addresses where !string.IsNullOrWhiteSpace(address.ToString()) select address.ToString()).ToList()); ChickenPlace place = new ChickenPlace() { Id = entry.id.Value.ToString(), Source = SOURCE_NAME, Name = "KFC:" + CultureInfo.CurrentCulture.TextInfo.ToTitleCase(entry.storeName.Value.ToLower()), Address = Addresses, TelephoneNumber = entry.telno, MenuAvaiable = false, Location = new Location() { Lat = entry.latitude, Long = entry.longitude }, }; returner.Add(place); } catch (Exception ex) { logger.Error("Failed to add KFC entry", ex); } } return returner; }
public static void AddChickenPlace(ChickenPlace place) { using (var conn = GetConnection()) { try { conn.Insert<ChickenPlace>(place); } catch (Exception ex) { logger.Error("Could not insert place", ex); } } }
public List<ChickenPlace> GetAvailablePlaces(Location loc) { List<ChickenPlace> possible_places = new List<ChickenPlace>(); List<ChickenPlace> found_places = new List<ChickenPlace>(); var client = new TimeoutWebClient(); client.SetTimeout(4); client.Headers[HttpRequestHeader.UserAgent] = IOS_USER_AGENT; var doc = new HtmlDocument(); string query_url = string.Format(BASE_URL, loc.FirstPostCode); logger.Info(string.Format("Downloading page {0}", query_url)); try { doc.Load(client.OpenRead(query_url)); } catch (Exception ex) { logger.Error("Failed to download page", ex); return found_places; } var OpenSections = doc.GetElementbyId("OpenRestaurants"); var OpenPlaceNodes = OpenSections.SelectNodes(".//li"); if (OpenPlaceNodes == null) { logger.Info("Found no OpenPlaceNodes, returning"); return found_places; } // Go through each takeaway and discard some based on their cuisine types. foreach (var TakeAway in OpenPlaceNodes) { var place = new ChickenPlace(); place.Source = SOURCE_NAME; place.MenuAvaiable = this.SupportsMenu(); // Get the <a> tag containing the name and the link to the place var name_link = TakeAway.SelectSingleNode("a[contains(@class,'name')]"); place.Id = name_link.Attributes["href"].Value; // Extract the title of the place. Some titles have <span> elements with "sponsored" in them, if this is the case then skip it. var takeaway_name_base = name_link.SelectSingleNode("h2"); var name_node = takeaway_name_base; if (takeaway_name_base.SelectSingleNode("span") != null) name_node = takeaway_name_base.SelectSingleNode("span"); place.Name = RemoveUselessCharacters(takeaway_name_base.InnerText).Replace("sponsored", string.Empty).Replace("Sponsored", string.Empty); // Get the <div> that contains the rating and cuisine types var place_details = name_link.SelectSingleNode("./div[@class='restaurantDetails']"); // Get a list of cuisine types string cuisine_string = place_details.SelectSingleNode("./p[@class='cuisineTypeList']").InnerText.Trim(); string[] cuisine_list = (from cuisine in cuisine_string.Split(',') select RemoveUselessCharacters(cuisine).ToLower()).ToArray(); // Take the list of cuisines and intersect it with the allowed cuisine list var cuisine_intersection = ALLOWED_CUISINES.Intersect(cuisine_list); if (cuisine_intersection.Count() == 0) continue; // Get the rating. Its a bit of a hack, but w/e var rating_node = place_details.SelectSingleNode("./p[contains(@class,'rating')]"); // The rating itself is in the class. //place.Rating = NumberScale.ScaleNumber(Convert.ToInt32(rating_node.Attributes["class"].Value.Split('-')[1]), 0, 60, 100, 0); if (!DB.DoesChickenPlaceNotHaveChicken(place)) { var saved_results = DB.GetChickenPlaceById(SOURCE_NAME, new string[] { place.Id }); if (saved_results.Count() == 0) { possible_places.Add(place); } else { found_places.Add(saved_results[0]); } }; }; Parallel.ForEach(possible_places, place => { // Ok. Now we have to fetch the actual menu page var menu_doc = new HtmlDocument(); var menu_client = new TimeoutWebClient(); //menu_client.SetTimeout(3); try { menu_doc.Load(menu_client.OpenRead(HOST + place.Id)); } catch (Exception ex) { logger.Error(string.Format("Could not download JustEat page for place {0}", place.Id), ex); return; } // Check if they actually serve fried chicken // XPath has now lower-case function (for some insane reason), hence the use of the rather ugly translate hack. var has_chicken = menu_doc.DocumentNode.SelectSingleNode(@".//h2[@class='H2MC' and contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'chicken')]"); if (has_chicken == null) { // No chicken here. Create a tombstone place.HasChicken = false; DB.AddChickenPlace(place); return; }; // Get the address (used for geolocating) var address_node = menu_doc.DocumentNode.SelectSingleNode(".//span[@itemtype='http://schema.org/PostalAddress']"); if (address_node == null) { logger.Error(string.Format("Could not find address for {0}", place.Id)); return; } var address_street = address_node.SelectSingleNode(".//span[@itemprop='streetAddress']").InnerText; var address_place = address_node.SelectSingleNode(".//span[@itemprop='addressLocality']").InnerText; var address_postcode = address_node.SelectSingleNode(".//span[@itemprop='postalCode']").InnerText; place.Address = string.Format("{0}, {1}, {2}", address_street, address_place, address_postcode).Trim(); lock (found_places) found_places.Add(place); place.HasChicken = true; DB.AddChickenPlace(place); }); // Lets GeoCode the *shit* out of our places using YQL YQL.GeoLocatePlaces(ref found_places); return found_places; }
public List<ChickenPlace> GetAvailablePlaces(Location loc) { logger.Debug(string.Format("GetAvailablePlaces called: Lat: {0} Long: {1} PostCode: {2}", loc.Lat, loc.Long, loc.PostCode)); List<ChickenPlace> returner = new List<ChickenPlace>(); var client = new WebClient(); client.Headers[HttpRequestHeader.UserAgent] = CHROME_USER_AGENT; string page_html; try { string furl = string.Format(FETCH_URL, loc.FirstPostCode); logger.Debug(string.Format("Fetching URL {0}", furl)); page_html = client.DownloadString(furl); } catch (Exception ex) { logger.Error("Could not fetch URL", ex); return returner; } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(page_html); var results_node = doc.DocumentNode.SelectSingleNode(".//div[@class='restsSearchResultsLayout']"); if (results_node == null) { logger.Error("Could not parse restsSearchResultsLayout from HTML response"); return returner; } foreach (var place_node in results_node.SelectNodes(".//div[@class='restsSearchItemRes']")) { ChickenPlace place = new ChickenPlace() { Source=SOURCE_NAME, MenuAvaiable=true }; try { // Check if the place is open first var open_node = place_node.SelectSingleNode(".//a[contains(@class,'restsRestStatus')]"); if (open_node == null) { logger.Error("restsRestsStatus is null"); continue; } if (!open_node.Attributes["class"].Value.Split(' ').Contains("restsStatusOpen")) continue; var page_link = place_node.SelectSingleNode(".//a[@class='restPageLink']"); if (page_link == null) { logger.Error("restPageLink is null"); continue; } place.Id = page_link.Attributes["href"].Value; place.Name = page_link.Attributes["title"].Value; // Get the rating. The rating is stored in a <div> as the style attribute, like "width:90%" means 90% rating // ToDo: make this a regex? var rating_node = place_node.SelectSingleNode(".//div[@class='restsRating']/div"); if (rating_node != null) { Rating place_rating = new Rating(); string rating_text = rating_node.InnerText; // Text is like so: "Average rating: X of Y" try { string x_of_y = rating_text.Split(':')[1].Trim(); int thescore = Convert.ToInt32(x_of_y.Split(' ')[0]); int max = Convert.ToInt32(x_of_y.Split(' ')[2]); place_rating.Score = NumberScale.ScaleNumber(thescore, 0, 50, 100, 0); } catch (Exception ex) { logger.Error("Could not parse rating_node", ex); } // Get the total number of reviewers var review_count_node = place_node.SelectSingleNode(".//a[@rel='restsReviewsTab']"); if (review_count_node != null) { place_rating.RatingsCount = Convert.ToInt32(review_count_node.InnerText.Split(' ')[0]); } if (place_rating.Score != 0) place.Rating = place_rating; } // The search page on HungryHouse is a bit shit so I'm not sure if it returns closed places or not. // ToDo: Check this and remove closed places from returner. // Get the location of the place. This is a huge hack - the lat/long is encoded in a <a> tag's style attribute // which is used to display a google maps map. // Firs get the restsMap node var map_node = place_node.SelectSingleNode(".//div[@class='restsMap']"); if (map_node != null) { try { place.Address = map_node.SelectSingleNode(".//div").InnerText.Replace("\t", string.Empty).Trim(); if (place.Address.Contains("Distance")) place.Address = place.Address.Remove(place.Address.IndexOf("Distance")); } catch (Exception ex) { logger.Error("Could not parse the address", ex); continue; } // Get the restsMapImage and extract the style try { var node = map_node.SelectSingleNode(".//a[@class='restsMapImage']"); //logger.Debug("Found map node"); string map_style = node.Attributes["style"].Value; //logger.Debug("Got map style attribute"); string uri = UriExtractor.Match(map_style).Groups[0].Value; //logger.Debug(string.Format("Extracted URI: {0}", uri)); var parsed_qs = HttpUtility.ParseQueryString(uri); //logger.Debug(string.Format("Parsed QS. Keys: {0}", string.Join(", ", parsed_qs.AllKeys))); // On mono the key is "center", but on my development machine the key is amp;center. Weird. string key = parsed_qs.AllKeys.Contains("center") ? "center" : "amp;center"; var location = parsed_qs[key].Split(','); //logger.Debug(string.Format("Location: {0},{1}", location[0], location[1])); place.Location = new Location() { Lat = float.Parse(location[0]), Long = float.Parse(location[1]) }; } catch (Exception ex) { logger.Error("Could not extract location from URI", ex); string fpath = Path.GetTempFileName(); File.WriteAllText(fpath, map_node.InnerHtml); logger.Error(string.Format("Dumped map_node to file: {0}", fpath)); continue; } } else { logger.Error(string.Format("Could not parse map node for place {0}", place.Name)); continue; } } catch (Exception ex) { logger.Error("Could not iterate over place", ex); } returner.Add(place); } return returner; }