コード例 #1
0
        private List <IMovie> MineForecast(int articleNumber = 1)
        {
            var    result = new List <IMovie>();
            string url    = Url + "news/";
            var    web    = new HtmlWeb();

            var doc = web.Load(url);                    // Load main page.

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            HtmlNode node = null;

            if (articleNumber == 1)
            {
                //node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, '/news/?id=')]");
                node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, '/article/')]");
            }
            else
            {
                //var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, '/news/?id=')]");
                var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, '/article/')]");

                if (nodes != null && articleNumber <= nodes.Count)
                {
                    node = nodes[articleNumber - 1];
                }
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = $"{Url}/{href}";

                    doc = web.Load(UrlSource);

                    // Get the date of the article (hoping that the date is the ONLY thing in such a small font)

                    //node = doc.DocumentNode.SelectSingleNode("//body//font[@size='1']");
                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='mojo-news-byline']");

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.ChildNodes.Count > 1)
                        {
                            string   articleText = HttpUtility.HtmlDecode(node.ChildNodes[1].InnerText).Trim();
                            var      tokens      = articleText.Split(new char[] { '-' });
                            DateTime parsedDateTime;

                            if (tokens.Length > 0 && DateTime.TryParse(tokens[0].Replace("PDT", string.Empty).Replace("PST", string.Empty), out parsedDateTime))
                            {
                                articleDate = parsedDateTime.Date;
                            }
                        }
                    }

                    // Need to scan for the <p> tag that contains "This weekend's forecast is directly below."

                    // The movies are just in a <ul> tag (unsorted list)

                    var movieNodes = doc.DocumentNode?.SelectNodes("//body//ul/li/span[@class='a-list-item']");

                    if (movieNodes == null)
                    {
                        Error = NO_DATA;
                    }
                    else
                    {
                        foreach (var movieNode in movieNodes)
                        {
                            int index = movieNode.InnerText.IndexOf(DELIMITER);

                            if (index > 0)
                            {
                                var nodeText  = movieNode.InnerText;
                                var movieName = nodeText.Substring(0, index);

                                // Might switch this to RegEx...

                                var valueInMillions = nodeText.Substring(index, nodeText.Length - index)?.Contains("M");

                                var estimatedBoxOffice = nodeText.Substring(index, nodeText.Length - index)?.Replace(DELIMITER, string.Empty).Replace("M", string.Empty);

                                var parenIndex = movieName.IndexOf("(");

                                if (parenIndex > 0)
                                {
                                    // Trim out the THEATERS (for now).
                                    movieName = movieName.Substring(0, parenIndex - 1).Trim();
                                }

                                parenIndex = estimatedBoxOffice.IndexOf("(");

                                if (parenIndex > 0)
                                {
                                    // Trim out the multi-day value.
                                    estimatedBoxOffice = estimatedBoxOffice.Substring(0, parenIndex - 1).Trim();
                                }

                                decimal estBoxOffice;

                                if (!string.IsNullOrEmpty(movieName) && decimal.TryParse(estimatedBoxOffice, out estBoxOffice))
                                {
                                    var name  = MapName(RemovePunctuation(HttpUtility.HtmlDecode(movieName)));
                                    var movie = new Movie
                                    {
                                        MovieName = name,
                                        Earnings  = estBoxOffice * (valueInMillions.Value ? 1000000 : 1)
                                    };

                                    if (articleDate.HasValue)
                                    {
                                        movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                    }

                                    if (movie != null)
                                    {
                                        if (!result.Contains(movie))
                                        {
                                            result.Add(movie);
                                        }
                                        else if (GameDays > 3)
                                        {
                                            // It's OK to override the BO value if the game days is MORE than the default.

                                            // Need to use "fuzzy" logic here because the names may have dates as suffixes and those should match.
                                            var found = result.Find(item => item.Equals(movie));

                                            if (found != null && found.EarningsBase < movie.EarningsBase)
                                            {
                                                // Replace the movie if a larger value was found. (4 day weekend versus 3 day)

                                                result.Remove(found);
                                                result.Add(movie);

                                                Error = FOUR_DAY;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
コード例 #2
0
        public override List <IMovie> Mine()
        {
            var    result = new List <IMovie>();
            string url    = $"{Url}/daily-box-office-chart";
            var    web    = new HtmlWeb();

            ContainsEstimates = false;
            WeekendEnding     = MovieDateUtil.GameSunday();                     // This page should always have the "current" theater count.

            //  https://www.the-numbers.com/daily-box-office-chart

            var doc = web.Load(url);

            UrlSource = url;

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr[position()>1]");
            var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr");

            if (tableRows != null)
            {
                foreach (var row in tableRows)
                {
                    Movie movie      = null;
                    var   rowColumns = row.SelectNodes("td");

                    if (rowColumns != null)
                    {
                        int columnCount = 0;

                        foreach (var column in rowColumns)
                        {
                            if (columnCount == 2)
                            {
                                movie = new Movie
                                {
                                    Name = RemovePunctuation(MapName(HttpUtility.HtmlDecode(column.InnerText)))
                                };

                                if (WeekendEnding.HasValue)
                                {
                                    movie.WeekendEnding = WeekendEnding.Value;
                                }
                            }
                            else if (columnCount == 4)
                            {
                                movie.Earnings = ParseEarnings(column.InnerText);
                            }
                            else if (columnCount == 7)
                            {
                                decimal theaterCount = 0;

                                if (decimal.TryParse(column.InnerText?.Replace("-", "0"), out theaterCount))
                                {
                                    movie.TheaterCount = (int)theaterCount;
                                }
                                break;
                            }

                            columnCount++;
                        }
                    }

                    if (movie != null)
                    {
                        result.Add(movie);
                    }
                }
            }

            return(result);
        }
コード例 #3
0
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();
            var doc    = web.Load(DEFAULT_URL);

            // TODO: Somehow parse the page title from "Summer Week 13" into a Sunday date for each movie.

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var node = doc.DocumentNode.SelectSingleNode("body/script[@src='*/MonCompare*']");
            var node = doc.DocumentNode.SelectSingleNode("//body/script[contains(@src, 'MonCompare')]");

            if (node != null)
            {
                var src = node.GetAttributeValue("src", null);

                if (src != null)
                {
                    // Now retrieve the JSON (.js) page/file.

                    //doc = web.Load($"{DEFAULT_URL}/{src}");

                    var jsonData = HttpRequestUtil.DownloadString($"{DEFAULT_URL}/{src}");

                    // The string is not really JSON, but CLOSE
                    // Might want to use Regex to change this.

                    jsonData = jsonData.Replace("year =", "\"year\":");
                    jsonData = jsonData.Replace("season =", "\"season\":");
                    jsonData = jsonData.Replace("week =", "\"week\":");
                    jsonData = jsonData.Replace("movies=", "\"movies\":");
                    // Adjust the "JSON" array.
                    jsonData = jsonData.Replace("'[' +", "[").Replace("';", string.Empty).Replace(";", ",");
                    jsonData = jsonData.Replace("'+", string.Empty).Replace("'{", "{");

                    var movieData = JsonConvert.DeserializeObject <MineNerdData>($"{{{jsonData}}}");
                    int id        = 1;

                    foreach (var movie in movieData.Movies)
                    {
                        var name     = RemovePunctuation(HttpUtility.HtmlDecode(movie.Title));
                        var newMovie = new Movie
                        {
                            Id       = id++,
                            Name     = MapName(ParseName(name)),
                            Day      = ParseDayOfWeek(name),
                            Earnings = movie.OriginalEstimatedBoxOffice * 1000,
                            Cost     = movie.Bux,
                            //WeekendEnding = MovieDateUtil.NextSunday().Date
                            WeekendEnding = MovieDateUtil.GameSunday().Date
                        };

                        result.Add(newMovie);
                    }
                }
            }

            return(result);
        }