public override List <IMovie> Mine() { var result = new List <IMovie>(); if (_weekendEnding.HasValue) { var lastSunday = MovieDateUtil.GameSunday(null, ContainsEstimates).AddDays(-1); // Check to see if the weekend ending is out of date. if (ContainsEstimates || (_weekendEnding.Value < lastSunday && !ContainsEstimates)) { _weekendEnding = lastSunday; } // result = MineDate(); } else { _weekendEnding = MovieDateUtil.GameSunday(null, ContainsEstimates); // result = MineDate(); } return(result); }
public override List <IMovie> Mine() { var result = new List <IMovie>(); if (WeekendEnding.HasValue) { var lastSunday = MovieDateUtil.LastSunday(MovieDateUtil.GameSunday(null, ContainsEstimates).AddDays(-1)); // Check to see if the weekend ending is out of date. if (ContainsEstimates || (WeekendEnding.Value < lastSunday && !ContainsEstimates)) { WeekendEnding = lastSunday; } result = MineDate(); } else { result = MineForecast(); if (Error == NO_DATA || !result.Any()) { // Retry until you get some data. for (int pastArticles = 2; pastArticles < 4; pastArticles++) { Error = string.Empty; result = MineForecast(pastArticles); if (result.Count > 0 && (string.IsNullOrEmpty(Error) || Error == FOUR_DAY)) { break; } } } } return(result); }
public override List <IMovie> Mine() { var result = new List <IMovie>(); var web = new HtmlWeb(); DateTime?weekendEnding = null; UrlSource = $"{Url}/researchvault?section=box-office"; var doc = web.Load(UrlSource); // Get the data in the table. var tableNode = doc.DocumentNode.SelectSingleNode("//body//table[@class='tableType-group hasGroups']"); var tableRows = tableNode?.SelectNodes("thead//th[contains(@class, 'group')]"); // Figure out which column to mine from the column title. if (tableRows != null) { foreach (var tableHeader in tableRows) { // Grab the first one for now. ContainsEstimates = tableHeader.InnerText.ToLower().IndexOf("estimated") >= 0; var dateText = tableHeader.InnerText.ToLower().Replace("estimated", string.Empty); if (dateText != null) { char[] delimiter = { '-' }; var dateChunks = dateText.Split(delimiter); if (dateChunks.Length > 0) { weekendEnding = Convert.ToDateTime(dateChunks[0]); weekendEnding = MovieDateUtil.ThisSunday(weekendEnding); } } break; } } tableRows = tableNode?.SelectNodes("tbody//tr[contains(@class, 'group-')]"); foreach (var tableRow in tableRows) { var id = GetIdFromClass(tableRow?.Attributes["class"]?.Value); var nameNode = tableRow?.SelectSingleNode("td[contains(@class, 'movie-title')]//span[contains(@class, 'title')]"); var imageNode = tableRow?.SelectSingleNode("td//div[contains(@class, 'proxy-img')]"); var name = RemovePunctuation(HttpUtility.HtmlDecode(nameNode?.InnerText)); var dayOfWeek = ParseDayOfWeek(name); var movieName = ParseName(MapName(name), dayOfWeek); var movie = new Movie { Id = id, Day = dayOfWeek, Name = movieName }; // Grab the first one for now. var earningsNode = tableRow?.SelectSingleNode("td[@class='movie-earnings numeric stat']"); if (earningsNode != null) { movie.Earnings = ParseEarnings(earningsNode.InnerText); } if (imageNode != null) { movie.ImageUrl = imageNode?.Attributes["data-img-src"]?.Value; // Not able to download using https. movie.ImageUrlSource = movie.ImageUrl.Replace("https://", "http://"); } // Might as well grab the bux so the pick can be determined stand-alone var buxNode = tableRow?.SelectSingleNode("td[contains(@class, 'movie-title')]//span[contains(@class, 'cost')]"); if (buxNode != null) { movie.Cost = ParseEarnings(HttpUtility.HtmlDecode(buxNode.InnerText).Replace("FB", string.Empty)); } if (weekendEnding.HasValue) { // This weekend ending date is used to populate the "custom" box office weekend ending date. movie.WeekendEnding = ContainsEstimates ? weekendEnding.Value : MovieDateUtil.GameSunday(null, ContainsEstimates); } result.Add(movie); } foreach (var movie in result) { // Search for the table row that contains the name of the movie. var columns = tableNode?.SelectNodes($"tbody//tr[//span[text() = '{movie.Name}']]"); } result = result.OrderByDescending(movie => movie.Cost).ToList(); // Assign the control ids for the HTML controls, (if movie controls are in an array). var controlIndex = 1; result.ForEach(movie => movie.ControlId = controlIndex++); var gameDateRange = GetGameDateRange(web); if (gameDateRange.End.HasValue) { result.ForEach(item => item.WeekendEnding = gameDateRange.End.Value); } return(result); }
/// <summary> /// Returns a list of daily values based on the WeekendEnding /// </summary> /// <returns></returns> public override List <IMovie> Mine() { var result = new List <IMovie>(); //https://www.boxofficemojo.com/daily/chart/?sortdate=2019-04-26&track=marvel2019.htm string url = $"{Url}{Identifier}"; var web = new HtmlWeb(); ContainsEstimates = false; var doc = web.Load(url); UrlSource = url; // Need to get the dates out of the header row. // Need to find the movie row using the Identifier // Had some trouble finding the ancestor so just traverse up the document. var tableRow = doc.DocumentNode?.SelectSingleNode($"//tr[position()>1]"); // The most recent one. if (tableRow != null) { // This row should contain Rank, Title, Friday, Saturday, Sunday var rowColumns = tableRow.SelectNodes("td"); if (rowColumns != null) { IMovie movie = null; int columnCount = 0; foreach (var column in rowColumns) { if (columnCount == 0) // Date { movie = new Movie { Identifier = Identifier, WeekendEnding = ParseEndDate(HttpUtility.HtmlDecode(column.InnerText)) }; movie.Day = movie.WeekendEnding.DayOfWeek; movie.WeekendEnding = MovieDateUtil.GameSunday(movie.WeekendEnding); // These movies should all have the same WeekendEnding } else if (columnCount == 3) // Earnings { movie.Earnings = ParseEarnings(FirstToken(column.InnerText)); } columnCount++; } if (movie != null && movie.WeekendEnding == WeekendEnding) // Only want the matching movies in the Weekend provided. { result.Add(movie); } } } return(result); }
public override List <IMovie> Mine() { var result = new List <IMovie>(); string url = Url; var web = new HtmlWeb(); var doc = web.Load(url); // Lookup XPATH to get the right node that matches. // Select all of the <script> nodes that are children of <body> with an attribute of "src" // REF: https://www.w3schools.com/xml/xpath_syntax.asp var node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, 'predictions')]"); if (node != null) { var href = node.GetAttributeValue("href", null); if (href != null) { DateTime?articleDate = null; // Now retrieve the article page. UrlSource = href; doc = web.Load(UrlSource); // Get the date of the article (hoping that the date is the ONLY thing in such a small font) node = doc.DocumentNode.SelectSingleNode("//body//h2[text()='Weekend Predictions']|//body//h2[text()='Weekend Box Office Predictions']"); if (node == null) { node = doc.DocumentNode.SelectSingleNode("//body//h2[text()='4-Day Weekend Box Office Predictions']"); Error = FOUR_DAY; } if (node != null) { // Remove the first child span. if (node.HasChildNodes) { var articleText = HttpUtility.HtmlDecode(node.LastChild.InnerText).Trim(); char[] delimiters = { '-' }; var splitText = articleText.Split(delimiters); DateTime parsedDateTime; if (splitText != null && splitText.Length > 1 && DateTime.TryParse(splitText[1].Trim(), out parsedDateTime)) { articleDate = parsedDateTime.AddDays(-1); } } } // Get the data in the table. // TODO: Parse the header for column titles for mapping. var tableRows = doc.DocumentNode?.SelectNodes("//body//table[@class='inlineTable']//tr[position()>1]"); if (tableRows != null) { foreach (var row in tableRows) { Movie movie = null; var rowColumns = row.SelectNodes("td"); if (rowColumns != null) { int columnCount = 0; foreach (var column in rowColumns) { if (columnCount == 1) { var movieName = HttpUtility.HtmlDecode(column.InnerText); if (movieName != null) { // Remove the studio var parenIndex = movieName.IndexOf("("); if (parenIndex > 0) { // Trim out the FML bux. movieName = movieName.Substring(0, parenIndex).Trim(); } movie = new Movie { Name = MapName(RemovePunctuation(movieName)) }; if (articleDate.HasValue) { movie.WeekendEnding = MovieDateUtil.GameSunday(articleDate); } } } else if (columnCount == 2) { movie.Earnings = ParseEarnings(column.InnerText); } columnCount++; } } if (movie != null && !string.IsNullOrEmpty(movie.MovieName) && movie.EarningsBase > 0) { result.Add(movie); } } } } } return(result); }
public override List <IMovie> Mine() { var result = new List <IMovie>(); string url = $"{Url}/daily-box-office-chart"; var web = new HtmlWeb(); ContainsEstimates = false; WeekendEnding = MovieDateUtil.GameSunday(); // This page should always have the "current" theater count. // https://www.the-numbers.com/daily-box-office-chart var doc = web.Load(url); UrlSource = url; // Lookup XPATH to get the right node that matches. // Select all of the <script> nodes that are children of <body> with an attribute of "src" // REF: https://www.w3schools.com/xml/xpath_syntax.asp //var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr[position()>1]"); var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr"); if (tableRows != null) { foreach (var row in tableRows) { Movie movie = null; var rowColumns = row.SelectNodes("td"); if (rowColumns != null) { int columnCount = 0; foreach (var column in rowColumns) { if (columnCount == 2) { movie = new Movie { Name = RemovePunctuation(MapName(HttpUtility.HtmlDecode(column.InnerText))) }; if (WeekendEnding.HasValue) { movie.WeekendEnding = WeekendEnding.Value; } } else if (columnCount == 4) { movie.Earnings = ParseEarnings(column.InnerText); } else if (columnCount == 7) { decimal theaterCount = 0; if (decimal.TryParse(column.InnerText?.Replace("-", "0"), out theaterCount)) { movie.TheaterCount = (int)theaterCount; } break; } columnCount++; } } if (movie != null) { result.Add(movie); } } } return(result); }
public override List <IMovie> Mine() { var result = new List <IMovie>(); var web = new HtmlWeb(); var doc = web.Load(DEFAULT_URL); // TODO: Somehow parse the page title from "Summer Week 13" into a Sunday date for each movie. // Lookup XPATH to get the right node that matches. // Select all of the <script> nodes that are children of <body> with an attribute of "src" // REF: https://www.w3schools.com/xml/xpath_syntax.asp //var node = doc.DocumentNode.SelectSingleNode("body/script[@src='*/MonCompare*']"); var node = doc.DocumentNode.SelectSingleNode("//body/script[contains(@src, 'MonCompare')]"); if (node != null) { var src = node.GetAttributeValue("src", null); if (src != null) { // Now retrieve the JSON (.js) page/file. //doc = web.Load($"{DEFAULT_URL}/{src}"); var jsonData = HttpRequestUtil.DownloadString($"{DEFAULT_URL}/{src}"); // The string is not really JSON, but CLOSE // Might want to use Regex to change this. jsonData = jsonData.Replace("year =", "\"year\":"); jsonData = jsonData.Replace("season =", "\"season\":"); jsonData = jsonData.Replace("week =", "\"week\":"); jsonData = jsonData.Replace("movies=", "\"movies\":"); // Adjust the "JSON" array. jsonData = jsonData.Replace("'[' +", "[").Replace("';", string.Empty).Replace(";", ","); jsonData = jsonData.Replace("'+", string.Empty).Replace("'{", "{"); var movieData = JsonConvert.DeserializeObject <MineNerdData>($"{{{jsonData}}}"); int id = 1; foreach (var movie in movieData.Movies) { var name = RemovePunctuation(HttpUtility.HtmlDecode(movie.Title)); var newMovie = new Movie { Id = id++, Name = MapName(ParseName(name)), Day = ParseDayOfWeek(name), Earnings = movie.OriginalEstimatedBoxOffice * 1000, Cost = movie.Bux, //WeekendEnding = MovieDateUtil.NextSunday().Date WeekendEnding = MovieDateUtil.GameSunday().Date }; result.Add(newMovie); } } } return(result); }