Beispiel #1
0
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();

            if (_weekendEnding.HasValue)
            {
                var lastSunday = MovieDateUtil.GameSunday(null, ContainsEstimates).AddDays(-1);

                // Check to see if the weekend ending is out of date.

                if (ContainsEstimates || (_weekendEnding.Value < lastSunday && !ContainsEstimates))
                {
                    _weekendEnding = lastSunday;
                }

                // result = MineDate();
            }
            else
            {
                _weekendEnding = MovieDateUtil.GameSunday(null, ContainsEstimates);

                // result = MineDate();
            }

            return(result);
        }
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();

            if (WeekendEnding.HasValue)
            {
                var lastSunday = MovieDateUtil.LastSunday(MovieDateUtil.GameSunday(null, ContainsEstimates).AddDays(-1));

                // Check to see if the weekend ending is out of date.

                if (ContainsEstimates || (WeekendEnding.Value < lastSunday && !ContainsEstimates))
                {
                    WeekendEnding = lastSunday;
                }

                result = MineDate();
            }
            else
            {
                result = MineForecast();

                if (Error == NO_DATA || !result.Any())
                {
                    // Retry until you get some data.

                    for (int pastArticles = 2; pastArticles < 4; pastArticles++)
                    {
                        Error  = string.Empty;
                        result = MineForecast(pastArticles);

                        if (result.Count > 0 && (string.IsNullOrEmpty(Error) || Error == FOUR_DAY))
                        {
                            break;
                        }
                    }
                }
            }

            return(result);
        }
Beispiel #3
0
        public override List <IMovie> Mine()
        {
            var      result        = new List <IMovie>();
            var      web           = new HtmlWeb();
            DateTime?weekendEnding = null;

            UrlSource = $"{Url}/researchvault?section=box-office";

            var doc = web.Load(UrlSource);

            // Get the data in the table.

            var tableNode = doc.DocumentNode.SelectSingleNode("//body//table[@class='tableType-group hasGroups']");
            var tableRows = tableNode?.SelectNodes("thead//th[contains(@class, 'group')]");

            // Figure out which column to mine from the column title.

            if (tableRows != null)
            {
                foreach (var tableHeader in tableRows)
                {
                    // Grab the first one for now.

                    ContainsEstimates = tableHeader.InnerText.ToLower().IndexOf("estimated") >= 0;
                    var dateText = tableHeader.InnerText.ToLower().Replace("estimated", string.Empty);

                    if (dateText != null)
                    {
                        char[] delimiter  = { '-' };
                        var    dateChunks = dateText.Split(delimiter);

                        if (dateChunks.Length > 0)
                        {
                            weekendEnding = Convert.ToDateTime(dateChunks[0]);
                            weekendEnding = MovieDateUtil.ThisSunday(weekendEnding);
                        }
                    }

                    break;
                }
            }

            tableRows = tableNode?.SelectNodes("tbody//tr[contains(@class, 'group-')]");

            foreach (var tableRow in tableRows)
            {
                var id        = GetIdFromClass(tableRow?.Attributes["class"]?.Value);
                var nameNode  = tableRow?.SelectSingleNode("td[contains(@class, 'movie-title')]//span[contains(@class, 'title')]");
                var imageNode = tableRow?.SelectSingleNode("td//div[contains(@class, 'proxy-img')]");
                var name      = RemovePunctuation(HttpUtility.HtmlDecode(nameNode?.InnerText));
                var dayOfWeek = ParseDayOfWeek(name);
                var movieName = ParseName(MapName(name), dayOfWeek);

                var movie = new Movie
                {
                    Id   = id,
                    Day  = dayOfWeek,
                    Name = movieName
                };

                // Grab the first one for now.

                var earningsNode = tableRow?.SelectSingleNode("td[@class='movie-earnings numeric stat']");

                if (earningsNode != null)
                {
                    movie.Earnings = ParseEarnings(earningsNode.InnerText);
                }

                if (imageNode != null)
                {
                    movie.ImageUrl = imageNode?.Attributes["data-img-src"]?.Value;

                    // Not able to download using https.

                    movie.ImageUrlSource = movie.ImageUrl.Replace("https://", "http://");
                }

                // Might as well grab the bux so the pick can be determined stand-alone

                var buxNode = tableRow?.SelectSingleNode("td[contains(@class, 'movie-title')]//span[contains(@class, 'cost')]");

                if (buxNode != null)
                {
                    movie.Cost = ParseEarnings(HttpUtility.HtmlDecode(buxNode.InnerText).Replace("FB", string.Empty));
                }

                if (weekendEnding.HasValue)
                {
                    // This weekend ending date is used to populate the "custom" box office weekend ending date.
                    movie.WeekendEnding = ContainsEstimates ? weekendEnding.Value : MovieDateUtil.GameSunday(null, ContainsEstimates);
                }

                result.Add(movie);
            }

            foreach (var movie in result)
            {
                // Search for the table row that contains the name of the movie.
                var columns = tableNode?.SelectNodes($"tbody//tr[//span[text() = '{movie.Name}']]");
            }

            result = result.OrderByDescending(movie => movie.Cost).ToList();

            // Assign the control ids for the HTML controls, (if movie controls are in an array).

            var controlIndex = 1;

            result.ForEach(movie => movie.ControlId = controlIndex++);

            var gameDateRange = GetGameDateRange(web);

            if (gameDateRange.End.HasValue)
            {
                result.ForEach(item => item.WeekendEnding = gameDateRange.End.Value);
            }

            return(result);
        }
        /// <summary>
        /// Returns a list of daily values based on the WeekendEnding
        /// </summary>
        /// <returns></returns>
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();

            //https://www.boxofficemojo.com/daily/chart/?sortdate=2019-04-26&track=marvel2019.htm

            string url = $"{Url}{Identifier}";
            var    web = new HtmlWeb();

            ContainsEstimates = false;

            var doc = web.Load(url);

            UrlSource = url;

            // Need to get the dates out of the header row.

            // Need to find the movie row using the Identifier
            // Had some trouble finding the ancestor so just traverse up the document.

            var tableRow = doc.DocumentNode?.SelectSingleNode($"//tr[position()>1]");                           // The most recent one.

            if (tableRow != null)
            {
                // This row should contain Rank, Title, Friday, Saturday, Sunday

                var rowColumns = tableRow.SelectNodes("td");

                if (rowColumns != null)
                {
                    IMovie movie       = null;
                    int    columnCount = 0;

                    foreach (var column in rowColumns)
                    {
                        if (columnCount == 0)                               // Date
                        {
                            movie = new Movie
                            {
                                Identifier    = Identifier,
                                WeekendEnding = ParseEndDate(HttpUtility.HtmlDecode(column.InnerText))
                            };

                            movie.Day           = movie.WeekendEnding.DayOfWeek;
                            movie.WeekendEnding = MovieDateUtil.GameSunday(movie.WeekendEnding); // These movies should all have the same WeekendEnding
                        }
                        else if (columnCount == 3)                                               // Earnings
                        {
                            movie.Earnings = ParseEarnings(FirstToken(column.InnerText));
                        }

                        columnCount++;
                    }

                    if (movie != null && movie.WeekendEnding == WeekendEnding)                          // Only want the matching movies in the Weekend provided.
                    {
                        result.Add(movie);
                    }
                }
            }

            return(result);
        }
Beispiel #5
0
        public override List <IMovie> Mine()
        {
            var    result = new List <IMovie>();
            string url    = Url;
            var    web    = new HtmlWeb();

            var doc = web.Load(url);

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            var node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, 'predictions')]");

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = href;

                    doc = web.Load(UrlSource);

                    // Get the date of the article (hoping that the date is the ONLY thing in such a small font)

                    node = doc.DocumentNode.SelectSingleNode("//body//h2[text()='Weekend Predictions']|//body//h2[text()='Weekend Box Office Predictions']");

                    if (node == null)
                    {
                        node = doc.DocumentNode.SelectSingleNode("//body//h2[text()='4-Day Weekend Box Office Predictions']");

                        Error = FOUR_DAY;
                    }

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.HasChildNodes)
                        {
                            var      articleText = HttpUtility.HtmlDecode(node.LastChild.InnerText).Trim();
                            char[]   delimiters  = { '-' };
                            var      splitText   = articleText.Split(delimiters);
                            DateTime parsedDateTime;

                            if (splitText != null && splitText.Length > 1 && DateTime.TryParse(splitText[1].Trim(), out parsedDateTime))
                            {
                                articleDate = parsedDateTime.AddDays(-1);
                            }
                        }
                    }

                    // Get the data in the table.
                    // TODO: Parse the header for column titles for mapping.

                    var tableRows = doc.DocumentNode?.SelectNodes("//body//table[@class='inlineTable']//tr[position()>1]");

                    if (tableRows != null)
                    {
                        foreach (var row in tableRows)
                        {
                            Movie movie      = null;
                            var   rowColumns = row.SelectNodes("td");

                            if (rowColumns != null)
                            {
                                int columnCount = 0;

                                foreach (var column in rowColumns)
                                {
                                    if (columnCount == 1)
                                    {
                                        var movieName = HttpUtility.HtmlDecode(column.InnerText);

                                        if (movieName != null)
                                        {
                                            // Remove the studio
                                            var parenIndex = movieName.IndexOf("(");

                                            if (parenIndex > 0)
                                            {
                                                // Trim out the FML bux.
                                                movieName = movieName.Substring(0, parenIndex).Trim();
                                            }

                                            movie = new Movie {
                                                Name = MapName(RemovePunctuation(movieName))
                                            };

                                            if (articleDate.HasValue)
                                            {
                                                movie.WeekendEnding = MovieDateUtil.GameSunday(articleDate);
                                            }
                                        }
                                    }
                                    else if (columnCount == 2)
                                    {
                                        movie.Earnings = ParseEarnings(column.InnerText);
                                    }

                                    columnCount++;
                                }
                            }

                            if (movie != null && !string.IsNullOrEmpty(movie.MovieName) && movie.EarningsBase > 0)
                            {
                                result.Add(movie);
                            }
                        }
                    }
                }
            }

            return(result);
        }
        public override List <IMovie> Mine()
        {
            var    result = new List <IMovie>();
            string url    = $"{Url}/daily-box-office-chart";
            var    web    = new HtmlWeb();

            ContainsEstimates = false;
            WeekendEnding     = MovieDateUtil.GameSunday();                     // This page should always have the "current" theater count.

            //  https://www.the-numbers.com/daily-box-office-chart

            var doc = web.Load(url);

            UrlSource = url;

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr[position()>1]");
            var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr");

            if (tableRows != null)
            {
                foreach (var row in tableRows)
                {
                    Movie movie      = null;
                    var   rowColumns = row.SelectNodes("td");

                    if (rowColumns != null)
                    {
                        int columnCount = 0;

                        foreach (var column in rowColumns)
                        {
                            if (columnCount == 2)
                            {
                                movie = new Movie
                                {
                                    Name = RemovePunctuation(MapName(HttpUtility.HtmlDecode(column.InnerText)))
                                };

                                if (WeekendEnding.HasValue)
                                {
                                    movie.WeekendEnding = WeekendEnding.Value;
                                }
                            }
                            else if (columnCount == 4)
                            {
                                movie.Earnings = ParseEarnings(column.InnerText);
                            }
                            else if (columnCount == 7)
                            {
                                decimal theaterCount = 0;

                                if (decimal.TryParse(column.InnerText?.Replace("-", "0"), out theaterCount))
                                {
                                    movie.TheaterCount = (int)theaterCount;
                                }
                                break;
                            }

                            columnCount++;
                        }
                    }

                    if (movie != null)
                    {
                        result.Add(movie);
                    }
                }
            }

            return(result);
        }
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();
            var doc    = web.Load(DEFAULT_URL);

            // TODO: Somehow parse the page title from "Summer Week 13" into a Sunday date for each movie.

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var node = doc.DocumentNode.SelectSingleNode("body/script[@src='*/MonCompare*']");
            var node = doc.DocumentNode.SelectSingleNode("//body/script[contains(@src, 'MonCompare')]");

            if (node != null)
            {
                var src = node.GetAttributeValue("src", null);

                if (src != null)
                {
                    // Now retrieve the JSON (.js) page/file.

                    //doc = web.Load($"{DEFAULT_URL}/{src}");

                    var jsonData = HttpRequestUtil.DownloadString($"{DEFAULT_URL}/{src}");

                    // The string is not really JSON, but CLOSE
                    // Might want to use Regex to change this.

                    jsonData = jsonData.Replace("year =", "\"year\":");
                    jsonData = jsonData.Replace("season =", "\"season\":");
                    jsonData = jsonData.Replace("week =", "\"week\":");
                    jsonData = jsonData.Replace("movies=", "\"movies\":");
                    // Adjust the "JSON" array.
                    jsonData = jsonData.Replace("'[' +", "[").Replace("';", string.Empty).Replace(";", ",");
                    jsonData = jsonData.Replace("'+", string.Empty).Replace("'{", "{");

                    var movieData = JsonConvert.DeserializeObject <MineNerdData>($"{{{jsonData}}}");
                    int id        = 1;

                    foreach (var movie in movieData.Movies)
                    {
                        var name     = RemovePunctuation(HttpUtility.HtmlDecode(movie.Title));
                        var newMovie = new Movie
                        {
                            Id       = id++,
                            Name     = MapName(ParseName(name)),
                            Day      = ParseDayOfWeek(name),
                            Earnings = movie.OriginalEstimatedBoxOffice * 1000,
                            Cost     = movie.Bux,
                            //WeekendEnding = MovieDateUtil.NextSunday().Date
                            WeekendEnding = MovieDateUtil.GameSunday().Date
                        };

                        result.Add(newMovie);
                    }
                }
            }

            return(result);
        }