private void getHotelLocation(HtmlNode node, string name) { int comma = name.IndexOf(','); if (comma != -1) { Name = name.Substring(0, comma); Location = name.Substring(comma + 1); return; } string[] Capitals = new string[] { "Adelaide", "Brisbane", "Canberra", "Darwin", "Hobart", "Melbourne", "Sydney", "Perth" }; foreach (string s in Capitals) { int loc = name.ToUpper().IndexOf(s.ToUpper()); if (loc != -1) { Name = name.Substring(0, loc); Location = name.Substring(loc); return; } } Name = name; Location = node.CssSelect("a.breadcrumb_link[onclick*='City'] > span").Single().InnerText.Trim(); return; }
public override DateTime? GetSearchResultDateTime(HtmlNode item) { DateTime? result = null; var dtElement = item.CssSelect(".date").FirstOrDefault(); if (dtElement != null) { DateTime test; if (DateTime.TryParse(dtElement.InnerText, out test)) result = test; } return result; }
private double GetOdds(HtmlNode cell) { string oddsExpression = String.Format("{0}.{1}", SPAN, ODDS_CLASS); IEnumerable<HtmlNode> allOdds = cell.CssSelect(oddsExpression); int allOddsCount = allOdds.Count(); if (allOddsCount != 1) { throw new Exception("the number of odds is not exactly one"); } HtmlNode oddsNode = allOdds.First(); string fraction = oddsNode.InnerText.Trim('(', ')', ' '); double odds = ParseNumber(fraction); return odds; }
public override string GetSearchResultLocationName(HtmlNode item) { string result = null; var locationElement = item.CssSelect(".pnr").FirstOrDefault(); if (locationElement != null && locationElement.Element("small") != null) result = locationElement.Element("small").InnerText.Trim(); else if(locationElement != null) { var su = GetSearchResultUri(item).ToString(); var regex = new System.Text.RegularExpressions.Regex("(?:http://)(?<city>[\\w]+)"); var matches = regex.Match(su); if (matches.Groups["city"] != null) result = matches.Groups["city"].Value.Trim(); } return result; }
private static HtmlNode ParsePage(HtmlNode page) { IEnumerable<HtmlNode> fixtures = page.CssSelect(FIXTURES); int fixturesCount = fixtures.Count(); if (fixturesCount != 1) { throw new Exception("the number of fixtures on the Oddschecker site is not exactly one"); } HtmlNode fixture = fixtures.First(); IEnumerable<HtmlNode> tables = fixtures.CssSelect(TABLE_BODY); int tablesCount = tables.Count(); if (tablesCount != 1) { throw new Exception("the number of tables in the fixture is not exactly one"); } HtmlNode table = tables.First(); return table; }
private string[] processReview(HtmlNode nodeReview) { string[] result = new string[NUMBER_OF_SECTIONS]; // DATE var dateNode = nodeReview.CssSelect(".ratingDate").Single(); result[Section.Date] = dateNode.GetAttributeValue("title", dateNode.InnerText.Trim().Substring("Reviewed ".Length)); // RATING result[Section.Rating] = nodeReview.CssSelect(".rating_s > img").Single().GetAttributeValue("alt", "null").Substring(0, 1); // TITLE result[Section.Title] = nodeReview.CssSelect(".quote > a > span").Single().InnerText.Trim(); // TEXT var moreButton = nodeReview.CssSelect(".partial_entry .moreLink").FirstOrDefault(); if (moreButton != default(HtmlNode)) { string reviewID = nodeReview.ParentNode.GetAttributeValue("id", "null").Substring("review_".Length); if (reviewID != null) { string urlReview = urlReviewHead + "-r" + reviewID + "-" + urlTail; var innerWeb = new HtmlWeb(); var innerDoc = innerWeb.Load(urlReview); if (innerWeb.StatusCode == System.Net.HttpStatusCode.OK) { result[Section.Text] = innerDoc.DocumentNode.CssSelect("p[property='reviewBody']").Single().InnerText.Trim(); } else { throw404(urlReview); } } } else { result[Section.Text] = nodeReview.CssSelect(".partial_entry").Single().InnerText.Trim(); } // ASPECTS result[Section.AspectReviews] = "??"; return result; }
private void processPage(HtmlNode nodePage) { var allReviewsOnPage = nodePage.CssSelect(".review").ToArray(); byte onPage = (byte)allReviewsOnPage.Length; Task[] revTasks = new Task[onPage]; List<string[]> revOut = new List<string[]>(onPage); for (int revCount = 0; revCount < onPage; revCount += 1) { var currentRev = allReviewsOnPage[revCount]; revTasks[revCount] = Task.Factory.StartNew(() => { revOut.Add(processReview(currentRev)); }); } Task.WaitAll(revTasks); Reviews.AddRange(revOut); }
private Review processReview(HtmlNode current, string reviewNumber) { string Date = current.CssSelect("meta[itemProp='datePublished']").Single().GetAttributeValue("content").Trim(); string Rating = current.CssSelect("div[itemProp='reviewRating'] span[itemProp='ratingValue']").Single().InnerText.Trim(); Rating = Rating.Substring(1); Rating = Rating.Substring(0, Rating.Length - 1); string Review = current.CssSelect("p[itemProp='reviewBody']").Single().InnerText.Trim(); Review = Review.Replace(" ", " "); Review = Review.Replace(Environment.NewLine, ""); string Response; var ResponseNode = current.CssSelect(".restaurant-content span.responseexpandable").FirstOrDefault(); if (ResponseNode != default(HtmlNode)) { Response = ResponseNode.InnerText.Trim(); Response = Response.Replace(" ", " "); Response = Response.Replace(Environment.NewLine, ""); } else { Response = "NULL"; } return new Review(reviewNumber, PrimaryKey, Date, Rating, Review, Response); }
private Restaurant processRestaurant(HtmlNode current) { string PrimaryKey = current.CssSelect(".image.cell > a").Single().GetAttributeValue("href").Substring(@"/restaurant/".Length).Trim(); string Name = current.CssSelect(".restaurant-name").Single().InnerText.Trim(); string Score; string NumberOfReviews; HtmlNode scoreNode = current.CssSelect("dl.details.score > dd").Single(); HtmlNode scoreNodeTotal = scoreNode.CssSelect(".score-total").FirstOrDefault(); if (scoreNodeTotal != default(HtmlNode)) { Score = scoreNodeTotal.InnerText.Trim(); Score = Score.Substring(1); Score = Score.Substring(0, Score.Length - 1); NumberOfReviews = scoreNode.CssSelect(".score-description > a").Single().InnerText.Trim(); NumberOfReviews = NumberOfReviews.Remove(NumberOfReviews.Length - " reviews".Length); } else { Score = "NULL"; NumberOfReviews = "0"; } string Cuisine = current.CssSelect(".details.cuisine > dd").Single().InnerText.Trim(); if (Cuisine.ToUpper() == "BREAKFAST" || Cuisine.ToUpper() == "CAFE") return null; string BestFor = current.CssSelect(".details.best-for > dd").Single().InnerText.Trim(); if (BestFor.ToUpper() == "NOT AVAILABLE") { BestFor = "NULL"; } string AvgSpend = current.CssSelect(".details.spend > dd").Single().InnerText.Trim(); if (AvgSpend.ToUpper() != "N/A") { AvgSpend = AvgSpend.Substring(1); AvgSpend = AvgSpend.Remove(AvgSpend.Length - " per person".Length).Trim(); } else { AvgSpend = "NULL"; } return new Restaurant(PrimaryKey, Name, Score, NumberOfReviews, Cuisine, BestFor, AvgSpend); }
private int getNumberofRestaurants(HtmlNode docNode) { string fullString = docNode.CssSelect("h1.autocomplete-text").Single().InnerText.Trim(); string removeBefore = docNode.CssSelect("h1.autocomplete-text > .book").Single().InnerText; string removeAfter = " Restaurants in " + docNode.CssSelect("h1.autocomplete-text > strong").Single().InnerText; fullString = fullString.Substring(removeBefore.Length).Trim(); fullString = fullString.Remove(fullString.Length - removeAfter.Length); int NumRestaurants; if (int.TryParse(fullString, out NumRestaurants) == false) NumRestaurants = -1; return NumRestaurants; }
private static void LoadAPageOfPlayers(ScrapingBrowser browser, HtmlNode rootNode, ConcurrentBag<Player> players) { var tbody = rootNode.CssSelect("#result > tbody").SingleOrDefault(); if (tbody == null) { return; } var childRows = tbody.ChildNodes.Skip(1).ToList(); LoadPlayers(players, childRows); var searchResults = tbody.OwnerDocument.DocumentNode.CssSelect("#searchResults").Single(); if (searchResults.ChildNodes.Any() == false) { return; } var navigationRow = searchResults.ChildNodes[1]; var nextButton = navigationRow.ChildNodes.SingleOrDefault(n => n.InnerText.Trim() == "next"); if (nextButton != null) { var value = nextButton.Attributes.Single(a => a.Name == "href").Value; var uriString = BaseUrl + HttpUtility.HtmlDecode(value); var page = browser.NavigateToPage(new Uri(uriString)); LoadAPageOfPlayers(browser, page.Html, players); } }
private string GetTeam(HtmlNode cell) { string teamExpression = String.Format("{0}.{1}", SPAN, TEAM_CLASS); IEnumerable<HtmlNode> teams = cell.CssSelect(teamExpression); int teamsCount = teams.Count(); if (teamsCount != 1) { throw new Exception("the number of teams is not exactly one"); } HtmlNode team = teams.First(); return team.InnerText; }
private void ProcessDateRow(HtmlNode row) { IEnumerable<HtmlNode> spanNodes = row.CssSelect(SPAN); int spanNodesCount = spanNodes.Count(); if (spanNodesCount != 1) { throw new Exception("the number of span nodes is not exactly one"); } HtmlNode spanNode = spanNodes.First(); currentDate = ParseDate(spanNode.InnerText); }