// 1. Load each file with links in memory // 2. Filter the links // 3. Write the filtered links to the same file (overwrite) // This methid works only for raw web links. If it is used twice in a row, then results will be empty. public static void FilterLinks(string pathDataDir) { Console.WriteLine("Starting links filtering . . ."); IEnumerable <string> letterDirectories = Directory.EnumerateDirectories(pathDataDir, "?", SearchOption.TopDirectoryOnly); foreach (string pathLetterDir in letterDirectories) { /* 1. Get ch_links_?.csv from the current directory * 2. Load all information from there into a Dictionary<Guid, List<string>> * 3. Filter all the links for each entry in the dictionary, delete entry if list is empty */ Console.WriteLine("Filtering links in " + pathLetterDir); List <List <string> > characterLinks = new List <List <string> >(); string pathChLinks = FileManager.GetLinksFilePath(pathLetterDir); using (StreamReader reader = new StreamReader(pathChLinks)) { while (!reader.EndOfStream) { string input = reader.ReadLine(); if (!input.Equals(String.Empty)) { string[] linksInfo = input.Split(','); List <string> newLinksInfo = new List <string>(); // Add ID to the new list of links newLinksInfo.Add(linksInfo[0]); // Add all links for (int i = 1; i < linksInfo.Length; ++i) { string linkToTest = linksInfo[i]; if (IsValidLink(linkToTest)) { newLinksInfo.Add(IDHelper.GetIDFromLink(linkToTest, Path.GetFileName(pathLetterDir))); } } characterLinks.Add(newLinksInfo); } } } // 4. Write all the links to ch_links_?.csv using (StreamWriter writer = new StreamWriter(pathChLinks)) { foreach (List <string> listLinks in characterLinks) { if (listLinks.Count > 1) { writer.Write(listLinks[0]); for (int i = 1; i < listLinks.Count; ++i) { writer.Write(String.Format(",{0}", listLinks[i])); } writer.WriteLine(); } } } } Console.WriteLine("Finished links filtering."); }