/// <summary> /// Finds the duplicates. /// </summary> public void FindDuplicates() { ConsoleHelper.Info("Getting previous episodes starting from {0}.", _firstEpisodeNumber); List <Episode> allEpisodes = GetAllEpisodes(_episodesCacheFolder, _blogPostUrlFormat, _episodeFileNameFormat, _firstEpisodeNumber); ConsoleHelper.Info("{0} episodes loaded.", allEpisodes.Count); ConsoleHelper.Info("Saving loaded episodes to cache."); // Save loaded episodes to improve processing speed. EpisodeHelper.SaveEpisodes(_episodesCacheFolder, _episodeFileNameFormat, allEpisodes, false); ConsoleHelper.Info("Loading current episode json file."); Episode lastEpisode = EpisodeHelper.GetEpisodeFromFile(_episodeJsonFilePath); if (lastEpisode != null) { ConsoleHelper.Info("Looking for duplicates in the last episode."); DisplayDuplicates(lastEpisode); ConsoleHelper.Info("Looking for stop words in the last episode."); DisplayStopWords(lastEpisode, _stopWords); if (allEpisodes != null) { ConsoleHelper.Info("Looking for duplicates between the last and previous episodes."); DisplayDuplicates(lastEpisode, allEpisodes); } } }
/// <summary> /// Displays possible stop words in the episode. /// </summary> /// <param name="episode">The last episode.</param> /// <param name="stopWords">The stop words.</param> private void DisplayStopWords(Episode episode, string[] stopWords) { List <Item> episodeItems = EpisodeHelper.GetEpisodeContentItems(episode); string regexExpression = string.Empty; for (int i = 0; i < stopWords.Length; i++) { regexExpression += stopWords[i]; if (i != stopWords.Length - 1) { regexExpression += "|"; } } regexExpression = $@"\b({regexExpression})\b"; foreach (ContentItem contentItem in episodeItems) { if (contentItem.Title != null) { FindStopWords("Title", contentItem.Title, regexExpression); } if (contentItem.Text != null) { FindStopWords("Text", contentItem.Text, regexExpression); } } }
/// <summary> /// Displays the duplicates. /// </summary> /// <param name="episode">The episode.</param> /// <param name="episodesList">The episodes list.</param> /// <returns></returns> private void DisplayDuplicates(Episode episode, List <Episode> episodesList) { bool duplicatesFound = false; foreach (Episode pastEpisode in episodesList) { List <Item> pastEpisodeItems = EpisodeHelper.GetEpisodeContentItems(pastEpisode); List <Item> episodeItems = EpisodeHelper.GetEpisodeContentItems(episode); foreach (ContentItem pastEpisodeItem in pastEpisodeItems) { string pastEpisodeUrl = GetCleanUrl(pastEpisodeItem.Url.TrimEnd('/')); foreach (ContentItem episodeItem in episodeItems) { if (!string.IsNullOrEmpty(episodeItem.Url)) { string episodeUrl = GetCleanUrl(episodeItem.Url.TrimEnd('/')); if (pastEpisodeUrl.Equals(episodeUrl, StringComparison.OrdinalIgnoreCase)) { ConsoleHelper.Warning("Ep. {0}, Url {1}. Current episode title: {2}".PadRight(10), pastEpisode.Number, pastEpisodeItem.Url, episodeItem.Title); duplicatesFound = true; } } } } } if (!duplicatesFound) { ConsoleHelper.Success("Yay! No duplicates found."); } }
/// <summary> /// Displays the duplicates between links in the episode, if any found. /// </summary> /// <param name="episode">The episode.</param> private void DisplayDuplicates(Episode episode) { bool duplicatesFound = false; List <Item> episodeItems = EpisodeHelper.GetEpisodeContentItems(episode); foreach (ContentItem contentItem in episodeItems) { if (!string.IsNullOrEmpty(contentItem.Url)) { string url = GetCleanUrl(contentItem.Url.TrimEnd('/')); foreach (ContentItem ci in episodeItems) { if ((contentItem != ci) && !string.IsNullOrEmpty(contentItem.Url) && !string.IsNullOrEmpty(ci.Url)) { string url2 = GetCleanUrl(ci.Url.TrimEnd('/')); if (url.Equals(url2, StringComparison.OrdinalIgnoreCase)) { ConsoleHelper.Warning("Current episode titles: \"{0}\" and \"{1}\"", contentItem.Title, ci.Title); duplicatesFound = true; } } } } } if (!duplicatesFound) { ConsoleHelper.Success("Yay! No duplicates found."); } }
/// <summary> /// Gets all episodes either from cache in the folder or from the blog. /// </summary> /// <param name="episodesCacheFolder">The episodes cache folder.</param> /// <param name="blogPostUrlFormat">The blog post URL format.</param> /// <param name="episodeFileNameFormat">The episode file name format.</param> /// <param name="firstEpisodeNumber">The first episode number.</param> /// <returns></returns> private List <Episode> GetAllEpisodes(string episodesCacheFolder, string blogPostUrlFormat, string episodeFileNameFormat, int firstEpisodeNumber) { bool episodeNotFound = false; int episodeNumber = firstEpisodeNumber; List <Episode> allEpisodes = new List <Episode>(); // Try to load episodes until the next one is not found while (!episodeNotFound) { string episodeFileName = string.Format(episodeFileNameFormat, episodeNumber); string episodeFilePath = Path.Combine(episodesCacheFolder, episodeFileName); // If available, load episode from file if (File.Exists(episodeFilePath)) { Episode episode = EpisodeHelper.GetEpisodeFromFile(episodeFilePath); episode.Number = episodeNumber; allEpisodes.Add(episode); ConsoleHelper.Info("Loaded episode {0} from cache.", episode.Number); } // Otherwise load from blog else { string episodeBlogUrl = string.Format(blogPostUrlFormat, episodeNumber); Episode episode; TryGetEpisodeFromBlog(episodeBlogUrl, out episode); if (episode != null) { episode.Number = episodeNumber; allEpisodes.Add(episode); ConsoleHelper.Info("Loaded episode {0} from blog.", episode.Number); } else { // If episode hasn't been loaded, stop loading episodes episodeNotFound = true; } } episodeNumber++; } return(allEpisodes); }