Beispiel #1
0
        private void getHotelLocation(HtmlNode node, string name)
        {
            int comma = name.IndexOf(',');
            if (comma != -1)
            {
                Name = name.Substring(0, comma);
                Location = name.Substring(comma + 1);
                return;
            }

            string[] Capitals = new string[] { "Adelaide", "Brisbane", "Canberra", "Darwin", "Hobart", "Melbourne", "Sydney", "Perth" };
            foreach (string s in Capitals)
            {
                int loc = name.ToUpper().IndexOf(s.ToUpper());
                if (loc != -1)
                {
                    Name = name.Substring(0, loc);
                    Location = name.Substring(loc);
                    return;
                }
            }

            Name = name;
            Location = node.CssSelect("a.breadcrumb_link[onclick*='City'] > span").Single().InnerText.Trim();
            return;
        }
        public override DateTime? GetSearchResultDateTime(HtmlNode item)
        {
            DateTime? result = null;
            var dtElement = item.CssSelect(".date").FirstOrDefault();

            if (dtElement != null)
            {
                DateTime test;
                if (DateTime.TryParse(dtElement.InnerText, out test))
                    result = test;
            }
            return result;
        }
Beispiel #3
0
        private double GetOdds(HtmlNode cell)
        {
            string oddsExpression = String.Format("{0}.{1}", SPAN, ODDS_CLASS);
            IEnumerable<HtmlNode> allOdds = cell.CssSelect(oddsExpression);
            int allOddsCount = allOdds.Count();

            if (allOddsCount != 1)
            {
                throw new Exception("the number of odds is not exactly one");
            }

            HtmlNode oddsNode = allOdds.First();
            string fraction = oddsNode.InnerText.Trim('(', ')', ' ');
            double odds = ParseNumber(fraction);
            return odds;
        }
        public override string GetSearchResultLocationName(HtmlNode item)
        {
            string result = null;
            var locationElement = item.CssSelect(".pnr").FirstOrDefault();

            if (locationElement != null && locationElement.Element("small") != null)
                result = locationElement.Element("small").InnerText.Trim();
            else if(locationElement != null)
            {
                var su = GetSearchResultUri(item).ToString();
                var regex = new System.Text.RegularExpressions.Regex("(?:http://)(?<city>[\\w]+)");
                var matches = regex.Match(su);
                if (matches.Groups["city"] != null)
                    result = matches.Groups["city"].Value.Trim();
            }
            return result;
        }
Beispiel #5
0
        private static HtmlNode ParsePage(HtmlNode page)
        {
            IEnumerable<HtmlNode> fixtures = page.CssSelect(FIXTURES);
            int fixturesCount = fixtures.Count();

            if (fixturesCount != 1)
            {
                throw new Exception("the number of fixtures on the Oddschecker site is not exactly one");
            }

            HtmlNode fixture = fixtures.First();
            IEnumerable<HtmlNode> tables = fixtures.CssSelect(TABLE_BODY);
            int tablesCount = tables.Count();

            if (tablesCount != 1)
            {
                throw new Exception("the number of tables in the fixture is not exactly one");
            }

            HtmlNode table = tables.First();
            return table;
        }
Beispiel #6
0
        private string[] processReview(HtmlNode nodeReview)
        {
            string[] result = new string[NUMBER_OF_SECTIONS];

            // DATE
            var dateNode = nodeReview.CssSelect(".ratingDate").Single();
            result[Section.Date] = dateNode.GetAttributeValue("title", dateNode.InnerText.Trim().Substring("Reviewed ".Length));

            // RATING
            result[Section.Rating] = nodeReview.CssSelect(".rating_s > img").Single().GetAttributeValue("alt", "null").Substring(0, 1);

            // TITLE
            result[Section.Title] = nodeReview.CssSelect(".quote > a > span").Single().InnerText.Trim();

            // TEXT
            var moreButton = nodeReview.CssSelect(".partial_entry .moreLink").FirstOrDefault();
            if (moreButton != default(HtmlNode))
            {
                string reviewID = nodeReview.ParentNode.GetAttributeValue("id", "null").Substring("review_".Length);
                if (reviewID != null)
                {
                    string urlReview = urlReviewHead + "-r" + reviewID + "-" + urlTail;
                    var innerWeb = new HtmlWeb();
                    var innerDoc = innerWeb.Load(urlReview);
                    if (innerWeb.StatusCode == System.Net.HttpStatusCode.OK)
                    {
                        result[Section.Text] = innerDoc.DocumentNode.CssSelect("p[property='reviewBody']").Single().InnerText.Trim();
                    }
                    else
                    {
                        throw404(urlReview);
                    }
                }
            }
            else
            {
                result[Section.Text] = nodeReview.CssSelect(".partial_entry").Single().InnerText.Trim();
            }

            // ASPECTS
            result[Section.AspectReviews] = "??";

            return result;
        }
Beispiel #7
0
        private void processPage(HtmlNode nodePage)
        {
            var allReviewsOnPage = nodePage.CssSelect(".review").ToArray();

            byte onPage = (byte)allReviewsOnPage.Length;

            Task[] revTasks = new Task[onPage];
            List<string[]> revOut = new List<string[]>(onPage);
            for (int revCount = 0; revCount < onPage; revCount += 1)
            {
                var currentRev = allReviewsOnPage[revCount];
                revTasks[revCount] = Task.Factory.StartNew(() =>
                {
                    revOut.Add(processReview(currentRev));
                });
            }
            Task.WaitAll(revTasks);

            Reviews.AddRange(revOut);
        }
Beispiel #8
0
        private Review processReview(HtmlNode current, string reviewNumber)
        {
            string Date = current.CssSelect("meta[itemProp='datePublished']").Single().GetAttributeValue("content").Trim();

            string Rating = current.CssSelect("div[itemProp='reviewRating'] span[itemProp='ratingValue']").Single().InnerText.Trim();
            Rating = Rating.Substring(1);
            Rating = Rating.Substring(0, Rating.Length - 1);

            string Review = current.CssSelect("p[itemProp='reviewBody']").Single().InnerText.Trim();
            Review = Review.Replace("  ", " ");
            Review = Review.Replace(Environment.NewLine, "");

            string Response;
            var ResponseNode = current.CssSelect(".restaurant-content span.responseexpandable").FirstOrDefault();
            if (ResponseNode != default(HtmlNode))
            {
                Response = ResponseNode.InnerText.Trim();
                Response = Response.Replace("  ", " ");
                Response = Response.Replace(Environment.NewLine, "");
            }
            else
            {
                Response = "NULL";
            }

            return new Review(reviewNumber, PrimaryKey, Date, Rating, Review, Response);
        }
Beispiel #9
0
        private Restaurant processRestaurant(HtmlNode current)
        {
            string PrimaryKey = current.CssSelect(".image.cell > a").Single().GetAttributeValue("href").Substring(@"/restaurant/".Length).Trim();

            string Name = current.CssSelect(".restaurant-name").Single().InnerText.Trim();

            string Score;
            string NumberOfReviews;
            HtmlNode scoreNode = current.CssSelect("dl.details.score > dd").Single();
            HtmlNode scoreNodeTotal = scoreNode.CssSelect(".score-total").FirstOrDefault();
            if (scoreNodeTotal != default(HtmlNode))
            {
                Score = scoreNodeTotal.InnerText.Trim();
                Score = Score.Substring(1);
                Score = Score.Substring(0, Score.Length - 1);

                NumberOfReviews = scoreNode.CssSelect(".score-description > a").Single().InnerText.Trim();
                NumberOfReviews = NumberOfReviews.Remove(NumberOfReviews.Length - " reviews".Length);
            }
            else
            {
                Score = "NULL";
                NumberOfReviews = "0";
            }

            string Cuisine = current.CssSelect(".details.cuisine > dd").Single().InnerText.Trim();
            if (Cuisine.ToUpper() == "BREAKFAST" || Cuisine.ToUpper() == "CAFE") return null;

            string BestFor = current.CssSelect(".details.best-for > dd").Single().InnerText.Trim();

            if (BestFor.ToUpper() == "NOT AVAILABLE")
            {
                BestFor = "NULL";
            }

            string AvgSpend = current.CssSelect(".details.spend > dd").Single().InnerText.Trim();
            if (AvgSpend.ToUpper() != "N/A")
            {
                AvgSpend = AvgSpend.Substring(1);
                AvgSpend = AvgSpend.Remove(AvgSpend.Length - "  per person".Length).Trim();
            }
            else
            {
                AvgSpend = "NULL";
            }

            return new Restaurant(PrimaryKey, Name, Score, NumberOfReviews, Cuisine, BestFor, AvgSpend);
        }
Beispiel #10
0
        private int getNumberofRestaurants(HtmlNode docNode)
        {
            string fullString = docNode.CssSelect("h1.autocomplete-text").Single().InnerText.Trim();
            string removeBefore = docNode.CssSelect("h1.autocomplete-text > .book").Single().InnerText;
            string removeAfter = " Restaurants  in " + docNode.CssSelect("h1.autocomplete-text > strong").Single().InnerText;
            fullString = fullString.Substring(removeBefore.Length).Trim();
            fullString = fullString.Remove(fullString.Length - removeAfter.Length);

            int NumRestaurants;
            if (int.TryParse(fullString, out NumRestaurants) == false) NumRestaurants = -1;

            return NumRestaurants;
        }
        private static void LoadAPageOfPlayers(ScrapingBrowser browser, HtmlNode rootNode, ConcurrentBag<Player> players)
        {
            var tbody = rootNode.CssSelect("#result > tbody").SingleOrDefault();
            if (tbody == null)
            {
                return;
            }

            var childRows = tbody.ChildNodes.Skip(1).ToList();
            LoadPlayers(players, childRows);

            var searchResults = tbody.OwnerDocument.DocumentNode.CssSelect("#searchResults").Single();
            if (searchResults.ChildNodes.Any() == false)
            {
                return;
            }

            var navigationRow = searchResults.ChildNodes[1];
            var nextButton = navigationRow.ChildNodes.SingleOrDefault(n => n.InnerText.Trim() == "next");
            if (nextButton != null)
            {
                var value = nextButton.Attributes.Single(a => a.Name == "href").Value;
                var uriString = BaseUrl + HttpUtility.HtmlDecode(value);
                var page = browser.NavigateToPage(new Uri(uriString));

                LoadAPageOfPlayers(browser, page.Html, players);
            }
        }
Beispiel #12
0
        private string GetTeam(HtmlNode cell)
        {
            string teamExpression = String.Format("{0}.{1}", SPAN, TEAM_CLASS);
            IEnumerable<HtmlNode> teams = cell.CssSelect(teamExpression);
            int teamsCount = teams.Count();

            if (teamsCount != 1)
            {
                throw new Exception("the number of teams is not exactly one");
            }

            HtmlNode team = teams.First();
            return team.InnerText;
        }
Beispiel #13
0
        private void ProcessDateRow(HtmlNode row)
        {
            IEnumerable<HtmlNode> spanNodes = row.CssSelect(SPAN);
            int spanNodesCount = spanNodes.Count();

            if (spanNodesCount != 1)
            {
                throw new Exception("the number of span nodes is not exactly one");
            }

            HtmlNode spanNode = spanNodes.First();
            currentDate = ParseDate(spanNode.InnerText);
        }