Example #1
0
        /// <summary>
        /// Finds the duplicates.
        /// </summary>
        public void FindDuplicates()
        {
            ConsoleHelper.Info("Getting previous episodes starting from {0}.", _firstEpisodeNumber);
            List <Episode> allEpisodes = GetAllEpisodes(_episodesCacheFolder, _blogPostUrlFormat, _episodeFileNameFormat, _firstEpisodeNumber);

            ConsoleHelper.Info("{0} episodes loaded.", allEpisodes.Count);

            ConsoleHelper.Info("Saving loaded episodes to cache.");
            // Save loaded episodes to improve processing speed.
            EpisodeHelper.SaveEpisodes(_episodesCacheFolder, _episodeFileNameFormat, allEpisodes, false);

            ConsoleHelper.Info("Loading current episode json file.");
            Episode lastEpisode = EpisodeHelper.GetEpisodeFromFile(_episodeJsonFilePath);

            if (lastEpisode != null)
            {
                ConsoleHelper.Info("Looking for duplicates in the last episode.");
                DisplayDuplicates(lastEpisode);

                ConsoleHelper.Info("Looking for stop words in the last episode.");
                DisplayStopWords(lastEpisode, _stopWords);

                if (allEpisodes != null)
                {
                    ConsoleHelper.Info("Looking for duplicates between the last and previous episodes.");
                    DisplayDuplicates(lastEpisode, allEpisodes);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Gets all episodes either from cache in the folder or from the blog.
        /// </summary>
        /// <param name="episodesCacheFolder">The episodes cache folder.</param>
        /// <param name="blogPostUrlFormat">The blog post URL format.</param>
        /// <param name="episodeFileNameFormat">The episode file name format.</param>
        /// <param name="firstEpisodeNumber">The first episode number.</param>
        /// <returns></returns>
        private List <Episode> GetAllEpisodes(string episodesCacheFolder, string blogPostUrlFormat, string episodeFileNameFormat, int firstEpisodeNumber)
        {
            bool           episodeNotFound = false;
            int            episodeNumber   = firstEpisodeNumber;
            List <Episode> allEpisodes     = new List <Episode>();

            // Try to load episodes until the next one is not found
            while (!episodeNotFound)
            {
                string episodeFileName = string.Format(episodeFileNameFormat, episodeNumber);
                string episodeFilePath = Path.Combine(episodesCacheFolder, episodeFileName);

                // If available, load episode from file
                if (File.Exists(episodeFilePath))
                {
                    Episode episode = EpisodeHelper.GetEpisodeFromFile(episodeFilePath);
                    episode.Number = episodeNumber;
                    allEpisodes.Add(episode);

                    ConsoleHelper.Info("Loaded episode {0} from cache.", episode.Number);
                }
                // Otherwise load from blog
                else
                {
                    string  episodeBlogUrl = string.Format(blogPostUrlFormat, episodeNumber);
                    Episode episode;

                    TryGetEpisodeFromBlog(episodeBlogUrl, out episode);

                    if (episode != null)
                    {
                        episode.Number = episodeNumber;
                        allEpisodes.Add(episode);

                        ConsoleHelper.Info("Loaded episode {0} from blog.", episode.Number);
                    }
                    else
                    {
                        // If episode hasn't been loaded, stop loading episodes
                        episodeNotFound = true;
                    }
                }

                episodeNumber++;
            }

            return(allEpisodes);
        }