/** * Extracts restaurant info from saved html code */ public override void extractRestaurant() { //Extract restaurant name String restName; if ((restName = htmlDoc.DocumentNode.SelectNodes("//h1[@itemprop='name']")[0].InnerHtml) == null) restName = "N/A"; removeChars(restName); //Extract cuisine type String cuisine; if ((cuisine = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='title']")[1].InnerHtml) == null) cuisine = "N/A"; cuisine = removeChars(cuisine); //Extract restaurant address String address; if ((address = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='streetAddress']")[0].InnerHtml) == null) address = "N/A"; address = removeChars(address); //Extract restaurant city String city; if ((city = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='addressLocality']")[0].InnerHtml) == null) city = "N/A"; city = removeChars(city); //Extract restaurant state String state; if ((state = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='addressRegion']")[0].InnerHtml) == null) state = "N/A"; state = removeChars(state); //Extract restaurant zip code String zipCode; if ((zipCode = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='postalCode']")[0].InnerHtml) == null) zipCode = "N/A"; zipCode = removeChars(zipCode); //Extract restaurant phone number String phone; if ((phone = htmlDoc.DocumentNode.SelectNodes("//span[@id='bizPhone']")[0].InnerHtml) == null) phone = "N/A"; phone = removeChars(phone); //Extract restaurant website String website; if ((website = htmlDoc.DocumentNode.SelectNodes("//a[@target='_blank']")[0].InnerHtml) == null) website = "N/A"; website = removeChars(website); restaurant = new Restaurant(restName, overallScore, cuisine, address, city, state, zipCode, phone, website); }
public override void extractRestaurant() { //Extract restaurant name String restName; if ((restName = htmlDoc.DocumentNode.SelectNodes("//h1[@itemprop='name']")[0].InnerHtml) == null) restName = "N/A"; restName = removeChars(restName); //Extract cuisine type String cuisine; if ((cuisine = htmlDoc.DocumentNode.SelectNodes("//span[@class='date']")[0].InnerHtml) == null) cuisine = "N/A"; else cuisine = cuisine.Substring(0, cuisine.IndexOf('|') - 1); cuisine = removeChars(cuisine); //Extract restaurant address String address; if ((address = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='streetAddress']")[0].InnerHtml) == null) address = "N/A"; address = removeChars(address); //Extract restaurant city String city; if ((city = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='addressLocality']")[0].InnerHtml) == null) city = "N/A"; city = removeChars(city); //Extract restaurant state String state; if ((state = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='addressRegion']")[0].InnerHtml) == null) state = "N/A"; state = removeChars(state); //Extract restaurant zip code String zipCode; if ((zipCode = htmlDoc.DocumentNode.SelectNodes("//span[@itemprop='postalCode']")[0].InnerHtml) == null) zipCode = "N/A"; zipCode = removeChars(zipCode); //Extract restaurant phone number // Text node query //p[itemprop='address']/text() \d{3}-\d{3}-\d{4} String phone = ""; HtmlNodeCollection test = (htmlDoc.DocumentNode.SelectNodes("//p[@itemprop='address']/text()")); foreach (HtmlNode node in test) { string s = node.OuterHtml.Trim(); MatchCollection matches = Regex.Matches(s, @"(\(\d{3}\) ?\d{3}( |-)?\d{4}|\d{3}( |-)?\d{3}( |-)?\d{4})", RegexOptions.Multiline); foreach (Match match in matches) { phone = match.Captures[0].Value; } } phone = removeChars(phone); //Extract restaurant website String website; if ((website = htmlDoc.DocumentNode.SelectNodes("//a[@target='_blank']")[1].InnerHtml) == null) website = "N/A"; website = removeChars(website); restaurant = new Restaurant(restName, overallScore, cuisine, address, city, state, zipCode, phone, website); }
/** * Extracts restaurant info from saved html code */ public override void extractRestaurant() { //Extract restaurant name string restName; if ((restName = htmlDoc.DocumentNode.SelectNodes("//h1[@itemprop='name']")[0].InnerHtml) == null) restName = "N/A"; removeChars(restName); //Extract cuisine type string cuisine; if ((cuisine = htmlDoc.DocumentNode.SelectNodes("//a[@data-ga_action='explore-resto-cuisine']")[0].InnerHtml) == null) cuisine = "N/A"; cuisine = removeChars(cuisine); //Extract restaurant address string address; if ((address = htmlDoc.DocumentNode.SelectNodes("//span[@class='street-address']")[0].InnerHtml) == null) address = "N/A"; address = removeChars(address); //Extract restaurant city string city; if ((city = htmlDoc.DocumentNode.SelectNodes("//span[@class='locality']")[0].InnerHtml) == null) city = "N/A"; city = removeChars(city); //Extract restaurant state string state; if ((state = htmlDoc.DocumentNode.SelectNodes("//span[@class='region']")[0].InnerHtml) == null) state = "N/A"; state = removeChars(state); //Extract restaurant zip code IEnumerable<string> zip = Enumerable.Empty<string>(); string zipCode; HtmlAttributeCollection attributes = htmlDoc.DocumentNode.SelectNodes("//a[@class='ga_event cs_track']")[0].Attributes; zip = from att in attributes where att.Name == "title" select att.Value; int startZipIndex = zip.ElementAt(0).IndexOf(state) + state.Length; zipCode = zip.ElementAt(0).Substring(startZipIndex); zipCode = removeChars(zipCode); //Extract restaurant phone number string phone; if ((phone = htmlDoc.DocumentNode.SelectNodes("//div[@class='phone tel']")[0].InnerHtml) == null) phone = "N/A"; phone = removeChars(phone); //Extract restaurant website IEnumerable<string> site = Enumerable.Empty<string>(); string website; HtmlAttributeCollection siteAttributes = htmlDoc.DocumentNode.SelectNodes("//a[@data-ga_action='explore-resto-website']")[0].Attributes; site = from att in siteAttributes where att.Name == "title" select att.Value; website = removeChars(site.ElementAt(0)); restaurant = new Restaurant(restName, overallScore, cuisine, address, city, state, zipCode, phone, website); }