Example #1
0
        /// <summary>
        /// Finds the duplicates.
        /// </summary>
        public void FindDuplicates()
        {
            ConsoleHelper.Info("Getting previous episodes starting from {0}.", _firstEpisodeNumber);
            List <Episode> allEpisodes = GetAllEpisodes(_episodesCacheFolder, _blogPostUrlFormat, _episodeFileNameFormat, _firstEpisodeNumber);

            ConsoleHelper.Info("{0} episodes loaded.", allEpisodes.Count);

            ConsoleHelper.Info("Saving loaded episodes to cache.");
            // Save loaded episodes to improve processing speed.
            EpisodeHelper.SaveEpisodes(_episodesCacheFolder, _episodeFileNameFormat, allEpisodes, false);

            ConsoleHelper.Info("Loading current episode json file.");
            Episode lastEpisode = EpisodeHelper.GetEpisodeFromFile(_episodeJsonFilePath);

            if (lastEpisode != null)
            {
                ConsoleHelper.Info("Looking for duplicates in the last episode.");
                DisplayDuplicates(lastEpisode);

                ConsoleHelper.Info("Looking for stop words in the last episode.");
                DisplayStopWords(lastEpisode, _stopWords);

                if (allEpisodes != null)
                {
                    ConsoleHelper.Info("Looking for duplicates between the last and previous episodes.");
                    DisplayDuplicates(lastEpisode, allEpisodes);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Displays possible stop words in the episode.
        /// </summary>
        /// <param name="episode">The last episode.</param>
        /// <param name="stopWords">The stop words.</param>
        private void DisplayStopWords(Episode episode, string[] stopWords)
        {
            List <Item> episodeItems    = EpisodeHelper.GetEpisodeContentItems(episode);
            string      regexExpression = string.Empty;

            for (int i = 0; i < stopWords.Length; i++)
            {
                regexExpression += stopWords[i];
                if (i != stopWords.Length - 1)
                {
                    regexExpression += "|";
                }
            }

            regexExpression = $@"\b({regexExpression})\b";

            foreach (ContentItem contentItem in episodeItems)
            {
                if (contentItem.Title != null)
                {
                    FindStopWords("Title", contentItem.Title, regexExpression);
                }

                if (contentItem.Text != null)
                {
                    FindStopWords("Text", contentItem.Text, regexExpression);
                }
            }
        }
Example #3
0
        /// <summary>
        /// Displays the duplicates.
        /// </summary>
        /// <param name="episode">The episode.</param>
        /// <param name="episodesList">The episodes list.</param>
        /// <returns></returns>
        private void DisplayDuplicates(Episode episode, List <Episode> episodesList)
        {
            bool duplicatesFound = false;

            foreach (Episode pastEpisode in episodesList)
            {
                List <Item> pastEpisodeItems = EpisodeHelper.GetEpisodeContentItems(pastEpisode);
                List <Item> episodeItems     = EpisodeHelper.GetEpisodeContentItems(episode);

                foreach (ContentItem pastEpisodeItem in pastEpisodeItems)
                {
                    string pastEpisodeUrl = GetCleanUrl(pastEpisodeItem.Url.TrimEnd('/'));

                    foreach (ContentItem episodeItem in episodeItems)
                    {
                        if (!string.IsNullOrEmpty(episodeItem.Url))
                        {
                            string episodeUrl = GetCleanUrl(episodeItem.Url.TrimEnd('/'));

                            if (pastEpisodeUrl.Equals(episodeUrl, StringComparison.OrdinalIgnoreCase))
                            {
                                ConsoleHelper.Warning("Ep. {0}, Url {1}. Current episode title: {2}".PadRight(10),
                                                      pastEpisode.Number, pastEpisodeItem.Url, episodeItem.Title);
                                duplicatesFound = true;
                            }
                        }
                    }
                }
            }

            if (!duplicatesFound)
            {
                ConsoleHelper.Success("Yay! No duplicates found.");
            }
        }
Example #4
0
        /// <summary>
        /// Displays the duplicates between links in the episode, if any found.
        /// </summary>
        /// <param name="episode">The episode.</param>
        private void DisplayDuplicates(Episode episode)
        {
            bool        duplicatesFound = false;
            List <Item> episodeItems    = EpisodeHelper.GetEpisodeContentItems(episode);

            foreach (ContentItem contentItem in episodeItems)
            {
                if (!string.IsNullOrEmpty(contentItem.Url))
                {
                    string url = GetCleanUrl(contentItem.Url.TrimEnd('/'));

                    foreach (ContentItem ci in episodeItems)
                    {
                        if ((contentItem != ci) && !string.IsNullOrEmpty(contentItem.Url) && !string.IsNullOrEmpty(ci.Url))
                        {
                            string url2 = GetCleanUrl(ci.Url.TrimEnd('/'));

                            if (url.Equals(url2, StringComparison.OrdinalIgnoreCase))
                            {
                                ConsoleHelper.Warning("Current episode titles: \"{0}\" and \"{1}\"", contentItem.Title, ci.Title);
                                duplicatesFound = true;
                            }
                        }
                    }
                }
            }

            if (!duplicatesFound)
            {
                ConsoleHelper.Success("Yay! No duplicates found.");
            }
        }
Example #5
0
        /// <summary>
        /// Gets all episodes either from cache in the folder or from the blog.
        /// </summary>
        /// <param name="episodesCacheFolder">The episodes cache folder.</param>
        /// <param name="blogPostUrlFormat">The blog post URL format.</param>
        /// <param name="episodeFileNameFormat">The episode file name format.</param>
        /// <param name="firstEpisodeNumber">The first episode number.</param>
        /// <returns></returns>
        private List <Episode> GetAllEpisodes(string episodesCacheFolder, string blogPostUrlFormat, string episodeFileNameFormat, int firstEpisodeNumber)
        {
            bool           episodeNotFound = false;
            int            episodeNumber   = firstEpisodeNumber;
            List <Episode> allEpisodes     = new List <Episode>();

            // Try to load episodes until the next one is not found
            while (!episodeNotFound)
            {
                string episodeFileName = string.Format(episodeFileNameFormat, episodeNumber);
                string episodeFilePath = Path.Combine(episodesCacheFolder, episodeFileName);

                // If available, load episode from file
                if (File.Exists(episodeFilePath))
                {
                    Episode episode = EpisodeHelper.GetEpisodeFromFile(episodeFilePath);
                    episode.Number = episodeNumber;
                    allEpisodes.Add(episode);

                    ConsoleHelper.Info("Loaded episode {0} from cache.", episode.Number);
                }
                // Otherwise load from blog
                else
                {
                    string  episodeBlogUrl = string.Format(blogPostUrlFormat, episodeNumber);
                    Episode episode;

                    TryGetEpisodeFromBlog(episodeBlogUrl, out episode);

                    if (episode != null)
                    {
                        episode.Number = episodeNumber;
                        allEpisodes.Add(episode);

                        ConsoleHelper.Info("Loaded episode {0} from blog.", episode.Number);
                    }
                    else
                    {
                        // If episode hasn't been loaded, stop loading episodes
                        episodeNotFound = true;
                    }
                }

                episodeNumber++;
            }

            return(allEpisodes);
        }