/// <summary> /// Finds the duplicates. /// </summary> public void FindDuplicates() { ConsoleHelper.Info("Getting previous episodes starting from {0}.", _firstEpisodeNumber); List <Episode> allEpisodes = GetAllEpisodes(_episodesCacheFolder, _blogPostUrlFormat, _episodeFileNameFormat, _firstEpisodeNumber); ConsoleHelper.Info("{0} episodes loaded.", allEpisodes.Count); ConsoleHelper.Info("Saving loaded episodes to cache."); // Save loaded episodes to improve processing speed. EpisodeHelper.SaveEpisodes(_episodesCacheFolder, _episodeFileNameFormat, allEpisodes, false); ConsoleHelper.Info("Loading current episode json file."); Episode lastEpisode = EpisodeHelper.GetEpisodeFromFile(_episodeJsonFilePath); if (lastEpisode != null) { ConsoleHelper.Info("Looking for duplicates in the last episode."); DisplayDuplicates(lastEpisode); ConsoleHelper.Info("Looking for stop words in the last episode."); DisplayStopWords(lastEpisode, _stopWords); if (allEpisodes != null) { ConsoleHelper.Info("Looking for duplicates between the last and previous episodes."); DisplayDuplicates(lastEpisode, allEpisodes); } } }
/// <summary> /// Gets all episodes either from cache in the folder or from the blog. /// </summary> /// <param name="episodesCacheFolder">The episodes cache folder.</param> /// <param name="blogPostUrlFormat">The blog post URL format.</param> /// <param name="episodeFileNameFormat">The episode file name format.</param> /// <param name="firstEpisodeNumber">The first episode number.</param> /// <returns></returns> private List <Episode> GetAllEpisodes(string episodesCacheFolder, string blogPostUrlFormat, string episodeFileNameFormat, int firstEpisodeNumber) { bool episodeNotFound = false; int episodeNumber = firstEpisodeNumber; List <Episode> allEpisodes = new List <Episode>(); // Try to load episodes until the next one is not found while (!episodeNotFound) { string episodeFileName = string.Format(episodeFileNameFormat, episodeNumber); string episodeFilePath = Path.Combine(episodesCacheFolder, episodeFileName); // If available, load episode from file if (File.Exists(episodeFilePath)) { Episode episode = EpisodeHelper.GetEpisodeFromFile(episodeFilePath); episode.Number = episodeNumber; allEpisodes.Add(episode); ConsoleHelper.Info("Loaded episode {0} from cache.", episode.Number); } // Otherwise load from blog else { string episodeBlogUrl = string.Format(blogPostUrlFormat, episodeNumber); Episode episode; TryGetEpisodeFromBlog(episodeBlogUrl, out episode); if (episode != null) { episode.Number = episodeNumber; allEpisodes.Add(episode); ConsoleHelper.Info("Loaded episode {0} from blog.", episode.Number); } else { // If episode hasn't been loaded, stop loading episodes episodeNotFound = true; } } episodeNumber++; } return(allEpisodes); }