Пример #1
0
        // 1. Load each file with links in memory
        // 2. Filter the links
        // 3. Write the filtered links to the same file (overwrite)
        // This methid works only for raw web links. If it is used twice in a row, then results will be empty.
        public static void FilterLinks(string pathDataDir)
        {
            Console.WriteLine("Starting links filtering . . .");

            IEnumerable <string> letterDirectories = Directory.EnumerateDirectories(pathDataDir, "?", SearchOption.TopDirectoryOnly);

            foreach (string pathLetterDir in letterDirectories)
            {
                /* 1. Get ch_links_?.csv from the current directory
                 * 2. Load all information from there into a Dictionary<Guid, List<string>>
                 * 3. Filter all the links for each entry in the dictionary, delete entry if list is empty
                 */

                Console.WriteLine("Filtering links in " + pathLetterDir);

                List <List <string> > characterLinks = new List <List <string> >();

                string pathChLinks = FileManager.GetLinksFilePath(pathLetterDir);
                using (StreamReader reader = new StreamReader(pathChLinks))
                {
                    while (!reader.EndOfStream)
                    {
                        string input = reader.ReadLine();

                        if (!input.Equals(String.Empty))
                        {
                            string[]      linksInfo    = input.Split(',');
                            List <string> newLinksInfo = new List <string>();
                            // Add ID to the new list of links
                            newLinksInfo.Add(linksInfo[0]);
                            // Add all links
                            for (int i = 1; i < linksInfo.Length; ++i)
                            {
                                string linkToTest = linksInfo[i];
                                if (IsValidLink(linkToTest))
                                {
                                    newLinksInfo.Add(IDHelper.GetIDFromLink(linkToTest, Path.GetFileName(pathLetterDir)));
                                }
                            }
                            characterLinks.Add(newLinksInfo);
                        }
                    }
                }


                // 4. Write all the links to ch_links_?.csv
                using (StreamWriter writer = new StreamWriter(pathChLinks))
                {
                    foreach (List <string> listLinks in characterLinks)
                    {
                        if (listLinks.Count > 1)
                        {
                            writer.Write(listLinks[0]);
                            for (int i = 1; i < listLinks.Count; ++i)
                            {
                                writer.Write(String.Format(",{0}", listLinks[i]));
                            }
                            writer.WriteLine();
                        }
                    }
                }
            }

            Console.WriteLine("Finished links filtering.");
        }