//----==== PRIVATE ====--------------------------------------------------------------------

        private void AddMovie(string nodeText, DateTime?articleDate, List <IMovie> result)
        {
            int index     = nodeText.IndexOf(DELIMITER);
            var movieName = nodeText.Substring(0, index);

            // Might switch this to RegEx...

            var valueInMillions = (nodeText.Substring(index, nodeText.Length - index)?.Contains("million") ?? false) ||
                                  (nodeText.Substring(index, nodeText.Length - index)?.Contains("milllion") ?? false);

            var estimatedBoxOffice = nodeText.Substring(index, nodeText.Length - index)?.Replace(DELIMITER, string.Empty).Replace("million", string.Empty).Replace("milllion", string.Empty);

            var parenIndex = estimatedBoxOffice.IndexOf("(");

            if (parenIndex > 0)
            {
                // Trim out the FML bux.
                estimatedBoxOffice = estimatedBoxOffice.Substring(0, parenIndex - 1);
            }

            if (!string.IsNullOrEmpty(movieName))
            {
                var name      = RemovePunctuation(HttpUtility.HtmlDecode(movieName));
                var dayOfWeek = ParseDayOfWeek(name);
                var movie     = new Movie
                {
                    MovieName = MapName(ParseName(name, dayOfWeek)),
                    Day       = ParseDayOfWeek(name),
                    Earnings  = decimal.Parse(estimatedBoxOffice) * (valueInMillions ? 1000000 : 1)
                };

                if (articleDate.HasValue)
                {
                    movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                }

                result.Add(movie);
            }
        }
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();

            var doc = web.Load($"{Url}/?s=weekend+box+office+predictions");

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            var node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, 'weekend-box-office-predictions-')]");

            Error = string.Empty;

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    UrlSource = href;

                    // Now retrieve the article page.

                    doc = web.Load(UrlSource);

                    // Get the date of the article

                    node = doc.DocumentNode.SelectSingleNode("//body//time[@class='post-published updated']");

                    if (node != null)
                    {
                        string   articleText = node.InnerText.Replace("On ", string.Empty);
                        DateTime parsedDateTime;

                        if (DateTime.TryParse(articleText, out parsedDateTime))
                        {
                            articleDate = parsedDateTime;
                        }
                    }

                    // Get the data in the article.
                    // Rank might be important because the movies below the main ones are ranked as well

                    var tableRows = doc.DocumentNode.SelectNodes("//body//article//h2[@style='text-align: center;'] | //body//article//p[contains(.,'Prediction:')]");

                    if (tableRows != null)
                    {
                        string currentName  = null;
                        string currentValue = null;
                        Movie  currentMovie = null;

                        foreach (var row in tableRows)
                        {
                            if (row.Name.ToLower() == "h2")
                            {
                                currentName = HttpUtility.HtmlDecode(row.InnerText);
                            }
                            else if (row.InnerText.StartsWith("Prediction:"))
                            {
                                currentValue = row.InnerText.Replace("Prediction:", string.Empty);
                            }

                            if (currentName != null && currentValue != null)
                            {
                                decimal earnings;
                                var     index = currentValue.IndexOf("million");

                                if (index > 0)
                                {
                                    currentValue = currentValue.Substring(0, index).Replace("$", string.Empty).Trim();
                                }

                                if (decimal.TryParse(currentValue, out earnings))
                                {
                                    currentName = RemovePunctuation(RemoveStudio(currentName));

                                    currentMovie = new Movie
                                    {
                                        Name     = MapName(currentName),
                                        Earnings = earnings * 1000000
                                    };

                                    if (articleDate.HasValue)
                                    {
                                        currentMovie.WeekendEnding = MovieDateUtil.NextSunday(articleDate.Value);
                                    }
                                }

                                currentName  = null;
                                currentValue = null;
                            }

                            if (currentMovie != null)
                            {
                                result.Add(currentMovie);
                                currentMovie = null;
                            }
                        }
                    }

                    // TODO: Exclude movies that are already in the list.  (or seek out movies from a list that was passed in)

                    tableRows = doc.DocumentNode.SelectNodes("//body//article//h2[@style='text-align: center;'] | //body//article//p[contains(.,'million')]");

                    if (tableRows != null)
                    {
                        bool foundHoldovers = false;

                        foreach (var row in tableRows)
                        {
                            if (foundHoldovers)
                            {
                                var index = row.InnerText.IndexOf("million", row.InnerText.IndexOf("million") + 1);

                                // Only want the row (paragraph) with "million" in it multiple times.

                                if (index > 0)
                                {
                                    // Match one or more digits, followed by a period and a space.
                                    // Gobble up (non-greedy using the ?) to 'cume' (meaning cumulative)
                                    var matches = Regex.Matches(row.InnerText, @"\d+\.\s.*?cume");

                                    foreach (Match match in matches)
                                    {
                                        var movie = new Movie();
                                        // \s - Match up to white space
                                        var titleMatch    = Regex.Match(match.Value, @"\s(.*?)\$");
                                        var earningsMatch = Regex.Match(match.Value, @"\$\d+.*?\(");
                                        var fourDayMatch  = Regex.Match(match.Value, @",\s\$\d+\.*\d*.*day");

                                        if (!string.IsNullOrEmpty(fourDayMatch.Value))
                                        {
                                            Error          = FOUR_DAY;
                                            movie.Earnings = ParseEarnings(fourDayMatch.Value.Replace(",", string.Empty).Replace("(4-day", string.Empty));
                                        }
                                        else if (!string.IsNullOrEmpty(earningsMatch.Value))
                                        {
                                            movie.Earnings = ParseEarnings(earningsMatch.Value.Replace("(", string.Empty));
                                        }

                                        if (!string.IsNullOrEmpty(titleMatch.Value))
                                        {
                                            movie.Name = titleMatch.Value.Trim().Replace(" $", string.Empty);

                                            index = movie.Name.LastIndexOf("(");

                                            if (index > 0)
                                            {
                                                movie.Name = RemovePunctuation(HttpUtility.HtmlDecode(movie.Name.Substring(0, index)));
                                            }
                                        }

                                        if (movie.Name != null && movie.Earnings > 0)
                                        {
                                            if (articleDate.HasValue)
                                            {
                                                movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate.Value);
                                            }

                                            result.Add(movie);
                                        }
                                    }

                                    break;
                                }
                            }

                            if (row.Name.ToLower() == "h2" && row.InnerText.ToLower().Contains("holdover"))
                            {
                                foundHoldovers = true;
                            }
                        }
                    }
                }
            }

            return(result);
        }
Example #3
0
        private List <IMovie> MineData(int articleNumber = 1)
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();

            var doc = web.Load($"{Url}category/tracking-forecasts");
            // Can't instanciate an Active-X control within a web application.
            //var doc = web.LoadFromBrowser(Url);

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, 'estimates-weekend')]");
            //var node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, 'weekend-forecast')]");

            HtmlNode     node  = null;
            const string XPATH = "//body//a[contains(@class, 'single-item__link')]";

            if (articleNumber == 1)
            {
                node = doc.DocumentNode.SelectSingleNode(XPATH);
            }
            else
            {
                var nodes = doc.DocumentNode.SelectNodes(XPATH);

                if (nodes != null && articleNumber <= nodes.Count)
                {
                    node = nodes[articleNumber - 1];
                }
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    UrlSource = href;

                    // Now retrieve the article page.

                    doc = web.Load(UrlSource);

                    // Get the date of the article

                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='entry-meta']/span[@class='entry-meta__post-date']");

                    if (node != null)
                    {
                        string   articleText = node.InnerText.Replace("&nbsp;•&nbsp;", string.Empty);
                        DateTime parsedDateTime;

                        if (DateTime.TryParse(articleText, out parsedDateTime))
                        {
                            articleDate = parsedDateTime;
                        }
                    }

                    // Get the data in the table.

                    //node = doc.DocumentNode.SelectSingleNode("//body//div[@class='post-container']/table");
                    //node = doc.DocumentNode.SelectSingleNode("//body//table[@class='wp-block-table aligncenter']");
                    //node = doc.DocumentNode.SelectSingleNode("//body//table[@width='716']");
                    node = doc.DocumentNode.SelectSingleNode("//body//table");

                    // TODO: Parse the header for column titles for mapping.

                    var tableRows = node?.SelectNodes("tbody/tr[position()>1]");                            // Skips the table header row (row 0)

                    if (tableRows != null)
                    {
                        foreach (var row in tableRows)
                        {
                            Movie movie      = null;
                            var   rowColumns = row.SelectNodes("td");

                            if (rowColumns != null)
                            {
                                int columnCount = 0;

                                foreach (var column in rowColumns)
                                {
                                    if (columnCount == 0)
                                    {
                                        movie = new Movie {
                                            Name = MapName(RemovePunctuation(HttpUtility.HtmlDecode(column.InnerText)))
                                        };

                                        if (articleDate.HasValue)
                                        {
                                            movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                        }
                                    }
                                    else if (columnCount == 2)
                                    {
                                        //movie.Earnings = decimal.Parse(column.InnerText?.Replace("$", string.Empty));

                                        var rawText = RemovePunctuation(HttpUtility.HtmlDecode(column.InnerText));

                                        if (rawText.Contains("4day"))
                                        {
                                            var tokens = rawText.Split();

                                            if (tokens.Length > 2)
                                            {
                                                rawText = tokens[tokens.Length - 2];
                                            }
                                        }

                                        var idx = rawText.IndexOf('\n');

                                        if (idx > 0)
                                        {
                                            rawText = rawText.Substring(0, idx);
                                        }

                                        movie.Earnings = ParseEarnings(rawText);
                                    }

                                    columnCount++;
                                }
                            }

                            if (movie != null)
                            {
                                result.Add(movie);
                            }
                        }
                    }
                }
            }

            if (!result.Any() || result.Sum(movie => movie.EarningsBase) <= 0)
            {
                Error = NO_DATA;
            }

            return(result);
        }
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();

            var doc = web.Load($"{Url}/chatter/searchmessages?boardId=fml-main-chatter&query=bonus%20bar");

            // Lookup XPATH to get the right node that matches.
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            var node = doc.DocumentNode.SelectSingleNode($"//body//div/h3[@class='topic-item__title']");

            if (node != null)
            {
                // Traverse up to the <div>
                node = node.ParentNode;
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("data-href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = $"{Url}/{href}";

                    doc = web.Load(UrlSource);

                    // Get the date of the article

                    node = doc.DocumentNode.SelectSingleNode("//body//span[@class='topic-item__attribution']/span[@class='time-date']");

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.HasChildNodes)
                        {
                            var articleText = HttpUtility.HtmlDecode(node.FirstChild.InnerText.Replace(",", string.Empty)).Trim();

                            // Remove the text after the year.

                            var year  = DateTime.Now.Year;
                            var index = articleText.IndexOf(year.ToString());

                            if (index > 0)
                            {
                                articleText = articleText.Substring(0, index + year.ToString().Length);
                            }

                            DateTime parsedDateTime;

                            if (DateTime.TryParse(articleText, out parsedDateTime))
                            {
                                articleDate = parsedDateTime;
                            }
                        }
                    }

                    // Get the data

                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='topic-item__body']");

                    if (node != null)
                    {
                        var movieNodes = node.SelectNodes($"//p/strong");                             // Find all of the estimate paragraphs (are in BOLD)

                        foreach (var movieNode in movieNodes)
                        {
                            var nodeText = HttpUtility.HtmlDecode(movieNode.InnerHtml);

                            var index  = nodeText.IndexOf(DELIMITER);
                            var index2 = nodeText.IndexOf(DELIMITER, index + 1);

                            if (index > 0 && index2 > 0)
                            {
                                var movieName = nodeText.Substring(0, index);

                                // Might switch this to RegEx...

                                var estimatedBoxOffice = nodeText.Substring(index + 1, index2 - (index + 1))?.Replace("$", string.Empty);

                                var trimIndex = estimatedBoxOffice.IndexOf("(");

                                if (trimIndex > 0)
                                {
                                    // Trim out the drop percentage (and everything after).
                                    estimatedBoxOffice = estimatedBoxOffice.Substring(0, trimIndex - 1);
                                }

                                if (!string.IsNullOrEmpty(movieName))
                                {
                                    var   name  = RemovePunctuation(HttpUtility.HtmlDecode(movieName));
                                    Movie movie = null;

                                    try
                                    {
                                        movie = new Movie
                                        {
                                            MovieName = MapName(ParseName(name)),
                                            Day       = ParseDayOfWeek(name),
                                            Earnings  = ParseEarnings(estimatedBoxOffice)
                                        };

                                        if (movie.Day.HasValue)
                                        {
                                            CompoundLoaded = true;
                                        }
                                    }
                                    catch (Exception exception)
                                    {
                                        Error       = "Some bad data";
                                        ErrorDetail = $"The movie did not parse correctly \"{name}\" - {exception.Message}";
                                        movie       = null;
                                    }

                                    if (movie != null)
                                    {
                                        if (!result.Contains(movie))
                                        {
                                            if (articleDate.HasValue)
                                            {
                                                movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                            }

                                            result.Add(movie);
                                        }
                                        else if (movie.Day.HasValue)
                                        {
                                            if (articleDate.HasValue)
                                            {
                                                movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                            }

                                            result.Add(movie);

                                            // Remove the movie that does NOT have a day.

                                            var toRemove = result.FirstOrDefault(item => item.Equals(movie) && !item.Day.HasValue);

                                            if (toRemove != null)
                                            {
                                                result.Remove(toRemove);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();

            var doc = web.Load($"{Url}/news");

            // Lookup XPATH to get the right node that matches.
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            // Select the first <a> node that contains the title attribute.

            var node = doc.DocumentNode.SelectSingleNode($"//body//a[contains(@title, '{_articleTitle}')]");

            if (node == null)
            {
                node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@title, 'Box Office Estimates') and not(contains(@title, 'Perri'))]|//body//a[contains(@title, 'Box Office Predictions') and not(contains(@title, 'Perri'))]");
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = $"{Url}/{href}";

                    doc = web.Load(UrlSource);

                    // Get the date of the article

                    //node = doc.DocumentNode.SelectSingleNode("//body//div[@class='credits']/span[@class='date']");
                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='post__credits']/div[@class='post__date-time']/div");

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.HasChildNodes)
                        {
                            string   articleText = HttpUtility.HtmlDecode(node.FirstChild.InnerText).Trim();
                            DateTime parsedDateTime;

                            if (DateTime.TryParse(articleText, out parsedDateTime))
                            {
                                articleDate = parsedDateTime;
                            }
                        }
                    }

                    // Get the data

                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='post__content']");

                    if (node != null)
                    {
                        var movieNodes = node.SelectNodes($"//p[contains(., '{DELIMITER}')]|//p[contains(., '{DELIMITER2}')]");                             // Find all of the estimate paragraphs

                        // As of 11/2/2017 Todd is separating things with <br /> now.
                        // As of 04/17/2019 Todd is putting the estimates all inside a single paragraph

                        if (movieNodes.Count <= 5)
                        {
                            var innerHtml  = HttpUtility.HtmlDecode(movieNodes.Last().InnerHtml);
                            var delimiters = new string[] { "\n", "<br>", "<br>\n", "<br><br>" };
                            var tokens     = innerHtml.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);

                            foreach (var token in tokens)
                            {
                                if (token != null && token.StartsWith("\""))                                // && token.EndsWith("million"))
                                {
                                    AddMovie(token.Replace("<br>", string.Empty), articleDate, result);
                                }
                            }
                        }
                        else
                        {
                            foreach (var movieNode in movieNodes)
                            {
                                int index = movieNode.InnerText.IndexOf(DELIMITER);

                                if (index < 0)
                                {
                                    index = movieNode.InnerText.IndexOf(DELIMITER2);
                                }

                                if (index > 0)
                                {
                                    var nodeText  = movieNode.InnerText;
                                    var movieName = nodeText.Substring(0, index);

                                    // Might switch this to RegEx...

                                    var multiplier         = Multiplier(nodeText.Substring(index, nodeText.Length - index));
                                    var estimatedBoxOffice = nodeText.Substring(index, nodeText.Length - index)?.Replace(DELIMITER, string.Empty).Replace(DELIMITER2, string.Empty).Replace("million", string.Empty).Replace("k", string.Empty);

                                    var parenIndex = estimatedBoxOffice.IndexOf("(");

                                    if (parenIndex > 0)
                                    {
                                        // Trim out the FML bux.
                                        estimatedBoxOffice = estimatedBoxOffice.Substring(0, parenIndex - 1);
                                    }

                                    if (!string.IsNullOrEmpty(movieName))
                                    {
                                        var   name  = RemovePunctuation(HttpUtility.HtmlDecode(movieName));
                                        Movie movie = null;

                                        try
                                        {
                                            var dayOfWeek = ParseDayOfWeek(name);

                                            movie = new Movie
                                            {
                                                MovieName = MapName(ParseName(name, dayOfWeek)),
                                                Day       = dayOfWeek,
                                                Earnings  = decimal.Parse(estimatedBoxOffice) * multiplier
                                            };

                                            if (movie.Day.HasValue)
                                            {
                                                CompoundLoaded = true;
                                            }
                                        }
                                        catch (Exception exception)
                                        {
                                            Error       = "Some bad data";
                                            ErrorDetail = $"The movie did not parse correctly \"{name}\" - {exception.Message}";
                                            movie       = null;
                                        }

                                        if (movie != null)
                                        {
                                            if (articleDate.HasValue)
                                            {
                                                movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                            }

                                            result.Add(movie);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
Example #6
0
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();

            var doc = web.Load(Url);

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, 'http://www.boxofficeprophets.com/column') and //strong='Weekend Forecast']");
            HtmlNode node = null;

            var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, 'http://www.boxofficeprophets.com/column')]");

            if (nodes != null)
            {
                foreach (var aNode in nodes)
                {
                    if (aNode.InnerText.Contains("Weekend Forecast"))
                    {
                        node = aNode;
                        break;
                    }
                }
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    UrlSource = href;

                    // Now retrieve the article page.

                    doc = web.Load(UrlSource);

                    // Get the date of the article

                    node = doc.DocumentNode.SelectSingleNode("//body//div[@id='EchoTopic']//h4");

                    if (node != null)
                    {
                        string   articleText = node.InnerText;
                        DateTime parsedDateTime;

                        if (DateTime.TryParse(articleText, out parsedDateTime))
                        {
                            articleDate = parsedDateTime;
                        }
                    }

                    // Get the data in the table.

                    node = doc.DocumentNode.SelectSingleNode("//body//div[@id='EchoTopic']//table[@width='100%']");

                    // TODO: Parse the header for column titles for mapping.

                    //var tableRows = node?.SelectNodes("//tr[position()>2]");
                    var tableRows = node?.SelectNodes("//tr[@bgcolor='eeeeee']");

                    if (tableRows == null)
                    {
                        // Try page 2

                        UrlSource += "&columnpage=2";

                        doc = web.Load(UrlSource);

                        // Get the data in the table.

                        node = doc.DocumentNode.SelectSingleNode("//body//div[@id='EchoTopic']//table[@width='100%']");

                        tableRows = node?.SelectNodes("//tr[@bgcolor='eeeeee']");
                    }

                    if (tableRows != null)
                    {
                        foreach (var row in tableRows)
                        {
                            Movie movie      = null;
                            var   rowColumns = row.SelectNodes("td");

                            if (rowColumns != null)
                            {
                                int columnCount = 0;

                                foreach (var column in rowColumns)
                                {
                                    if (columnCount == 1)
                                    {
                                        var movieName = MapName(RemovePunctuation(HttpUtility.HtmlDecode(column.InnerText)));

                                        // Don't create the movie is the name is hosed up.

                                        if (!string.IsNullOrEmpty(movieName))
                                        {
                                            movie = new Movie {
                                                Name = movieName
                                            };

                                            if (articleDate.HasValue)
                                            {
                                                movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                            }
                                        }
                                    }
                                    else if (columnCount == 4 && movie != null)
                                    {
                                        movie.Earnings = decimal.Parse(column.InnerText) * 1000000;
                                    }

                                    columnCount++;
                                }
                            }

                            if (movie != null)
                            {
                                result.Add(movie);
                            }
                        }
                    }
                }
            }

            return(result);
        }
Example #7
0
        public override List <IMovie> Mine()
        {
            var result = new List <IMovie>();
            var web    = new HtmlWeb();

            var doc = web.Load($"{Url}/chatter/searchmessages?boardId=fml-main-chatter&query=coupe");

            // Lookup XPATH to get the right node that matches.
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            var node = doc.DocumentNode.SelectSingleNode($"//body//div/h3[@class='topic-item__title']");

            if (node != null)
            {
                // Traverse up to the <div>
                node = node.ParentNode;
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("data-href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = $"{Url}/{href}";

                    doc = web.Load(UrlSource);

                    // Get the date of the article

                    node = doc.DocumentNode.SelectSingleNode("//body//span[@class='topic-item__attribution']/span[@class='time-date']");

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.HasChildNodes)
                        {
                            var articleText = HttpUtility.HtmlDecode(node.FirstChild.InnerText.Replace(",", string.Empty)).Trim();

                            // Remove the text after the year.

                            var year  = DateTime.Now.Year;
                            var index = articleText.IndexOf(year.ToString());

                            if (index > 0)
                            {
                                articleText = articleText.Substring(0, index + year.ToString().Length);
                            }

                            DateTime parsedDateTime;

                            if (DateTime.TryParse(articleText, out parsedDateTime))
                            {
                                articleDate = parsedDateTime;
                            }
                        }
                    }

                    // Get the data

                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='topic-item__body']");

                    if (node != null)
                    {
                        var movieNodes = node.SelectNodes($"//p[contains(., '{DELIMITER}')]|//p[contains(., '{DELIMITER2}')]|//li[contains(., '{DELIMITER}')]");                             // Find all of the estimate paragraphs

                        // As of 11/2/2017 Todd is separating things with <br /> now.

                        if (movieNodes.Count == 1)
                        {
                            var innerHtml  = HttpUtility.HtmlDecode(movieNodes.First().InnerHtml);
                            var delimiters = new string[] { "\n", "<br>\n", "<br><br>" };
                            var tokens     = innerHtml.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);

                            foreach (var token in tokens)
                            {
                                if (token.StartsWith("\""))                                // && token.EndsWith("million"))
                                {
                                    AddMovie(token.Replace("<br>", string.Empty), articleDate, result);
                                }
                            }
                        }
                        else
                        {
                            foreach (var movieNode in movieNodes)
                            {
                                var nodeText = HttpUtility.HtmlDecode(movieNode.InnerHtml);

                                int index = nodeText.IndexOf(DELIMITER);

                                if (index < 0)
                                {
                                    index = nodeText.IndexOf(DELIMITER2);
                                }

                                if (index > 0)
                                {
                                    var movieName = nodeText.Substring(0, index).Replace("<br>", string.Empty);

                                    // Might switch this to RegEx...

                                    //var multiplier = Multiplier(nodeText.Substring(index, nodeText.Length - index));
                                    var estimatedBoxOffice = nodeText.Substring(index, nodeText.Length - index)?.Replace(DELIMITER, string.Empty).Replace(DELIMITER2, string.Empty).Replace("million", string.Empty);

                                    var trimIndex = estimatedBoxOffice.IndexOf("(");

                                    if (trimIndex > 0)
                                    {
                                        // Trim out the drop percentage (and everything after).
                                        estimatedBoxOffice = estimatedBoxOffice.Substring(0, trimIndex - 1);
                                    }

                                    trimIndex = estimatedBoxOffice.IndexOf("<br>");

                                    if (trimIndex > 0)
                                    {
                                        // Trim out the HTML break (and everything after).
                                        estimatedBoxOffice = estimatedBoxOffice.Substring(0, trimIndex);
                                    }

                                    trimIndex = estimatedBoxOffice.IndexOf("|");

                                    if (trimIndex > 0)
                                    {
                                        // Trim out the COUPE label (and everything after).
                                        estimatedBoxOffice = estimatedBoxOffice.Substring(0, trimIndex - 1);
                                    }

                                    if (!string.IsNullOrEmpty(movieName))
                                    {
                                        var   name  = RemovePunctuation(HttpUtility.HtmlDecode(movieName));
                                        Movie movie = null;

                                        try
                                        {
                                            movie = new Movie
                                            {
                                                MovieName = MapName(ParseName(name)),
                                                Day       = ParseDayOfWeek(name),
                                                Earnings  = ParseEarnings(estimatedBoxOffice)
                                            };

                                            if (movie.Day.HasValue)
                                            {
                                                CompoundLoaded = true;
                                            }
                                        }
                                        catch (Exception exception)
                                        {
                                            Error       = "Some bad data";
                                            ErrorDetail = $"The movie did not parse correctly \"{name}\" - {exception.Message}";
                                            movie       = null;
                                        }

                                        if (movie != null)
                                        {
                                            if (!result.Contains(movie))
                                            {
                                                if (articleDate.HasValue)
                                                {
                                                    movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                                }

                                                result.Add(movie);
                                            }
                                            else if (movie.Day.HasValue)
                                            {
                                                if (articleDate.HasValue)
                                                {
                                                    movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                                }

                                                result.Add(movie);

                                                // Remove the movie that does NOT have a day.

                                                var toRemove = result.FirstOrDefault(item => item.Equals(movie) && !item.Day.HasValue);

                                                if (toRemove != null)
                                                {
                                                    result.Remove(toRemove);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
        private List <IMovie> MineForecast(int articleNumber = 1)
        {
            var    result = new List <IMovie>();
            string url    = Url + "news/";
            var    web    = new HtmlWeb();

            var doc = web.Load(url);                    // Load main page.

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            HtmlNode node = null;

            if (articleNumber == 1)
            {
                //node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, '/news/?id=')]");
                node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, '/article/')]");
            }
            else
            {
                //var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, '/news/?id=')]");
                var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, '/article/')]");

                if (nodes != null && articleNumber <= nodes.Count)
                {
                    node = nodes[articleNumber - 1];
                }
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = $"{Url}/{href}";

                    doc = web.Load(UrlSource);

                    // Get the date of the article (hoping that the date is the ONLY thing in such a small font)

                    //node = doc.DocumentNode.SelectSingleNode("//body//font[@size='1']");
                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='mojo-news-byline']");

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.ChildNodes.Count > 1)
                        {
                            string   articleText = HttpUtility.HtmlDecode(node.ChildNodes[1].InnerText).Trim();
                            var      tokens      = articleText.Split(new char[] { '-' });
                            DateTime parsedDateTime;

                            if (tokens.Length > 0 && DateTime.TryParse(tokens[0].Replace("PDT", string.Empty).Replace("PST", string.Empty), out parsedDateTime))
                            {
                                articleDate = parsedDateTime.Date;
                            }
                        }
                    }

                    // Need to scan for the <p> tag that contains "This weekend's forecast is directly below."

                    // The movies are just in a <ul> tag (unsorted list)

                    var movieNodes = doc.DocumentNode?.SelectNodes("//body//ul/li/span[@class='a-list-item']");

                    if (movieNodes == null)
                    {
                        Error = NO_DATA;
                    }
                    else
                    {
                        foreach (var movieNode in movieNodes)
                        {
                            int index = movieNode.InnerText.IndexOf(DELIMITER);

                            if (index > 0)
                            {
                                var nodeText  = movieNode.InnerText;
                                var movieName = nodeText.Substring(0, index);

                                // Might switch this to RegEx...

                                var valueInMillions = nodeText.Substring(index, nodeText.Length - index)?.Contains("M");

                                var estimatedBoxOffice = nodeText.Substring(index, nodeText.Length - index)?.Replace(DELIMITER, string.Empty).Replace("M", string.Empty);

                                var parenIndex = movieName.IndexOf("(");

                                if (parenIndex > 0)
                                {
                                    // Trim out the THEATERS (for now).
                                    movieName = movieName.Substring(0, parenIndex - 1).Trim();
                                }

                                parenIndex = estimatedBoxOffice.IndexOf("(");

                                if (parenIndex > 0)
                                {
                                    // Trim out the multi-day value.
                                    estimatedBoxOffice = estimatedBoxOffice.Substring(0, parenIndex - 1).Trim();
                                }

                                decimal estBoxOffice;

                                if (!string.IsNullOrEmpty(movieName) && decimal.TryParse(estimatedBoxOffice, out estBoxOffice))
                                {
                                    var name  = MapName(RemovePunctuation(HttpUtility.HtmlDecode(movieName)));
                                    var movie = new Movie
                                    {
                                        MovieName = name,
                                        Earnings  = estBoxOffice * (valueInMillions.Value ? 1000000 : 1)
                                    };

                                    if (articleDate.HasValue)
                                    {
                                        movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                    }

                                    if (movie != null)
                                    {
                                        if (!result.Contains(movie))
                                        {
                                            result.Add(movie);
                                        }
                                        else if (GameDays > 3)
                                        {
                                            // It's OK to override the BO value if the game days is MORE than the default.

                                            // Need to use "fuzzy" logic here because the names may have dates as suffixes and those should match.
                                            var found = result.Find(item => item.Equals(movie));

                                            if (found != null && found.EarningsBase < movie.EarningsBase)
                                            {
                                                // Replace the movie if a larger value was found. (4 day weekend versus 3 day)

                                                result.Remove(found);
                                                result.Add(movie);

                                                Error = FOUR_DAY;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }