/// <summary>Checks HTML pages for duplicated picture numbers</summary> /// <returns>Duplicated numbers</returns> public static List <int> GetDuplicates(List <string> htmlFiles, bool consoleWarnings = false) { List <int> pictureNumbers = new List <int>(); List <string> picsPaths = new List <string>(); List <int> duplicateNumbers = new List <int>(); foreach (string filePath in htmlFiles) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.Load(filePath, true); // Select picture labels string xPath = ".//p[@class='Рисунокподпись' or @class='РисунокподписьЗнак']"; HtmlNodeCollection picLabelNodes = htmlDoc.DocumentNode.SelectNodes(xPath); var query = picLabelNodes.AsEnumerable()?.Where(node => Regex.Match(node.InnerText, @"ис\.?\s?\d+").Success); if (query == null) { continue; } // Add repetitive picture numbers to the duplicates list foreach (HtmlNode labelNode in query) { int picNumber = Convert.ToInt32(Regex.Match(labelNode.InnerText, @"(?<=(ис\.?\s?))\d+").Value); if (!duplicateNumbers.Contains(picNumber) && pictureNumbers.Contains(picNumber)) { duplicateNumbers.Add(picNumber); } pictureNumbers.Add(picNumber); picsPaths.Add(filePath); } } duplicateNumbers.Sort(); if (consoleWarnings) { foreach (int num in duplicateNumbers) { Program.Log($"Картинки с именем 'Рис.{num}' найдены в следующих файлах:"); for (int i = 0; i < pictureNumbers.Count; i++) { if (pictureNumbers[i] == num) { Program.Log(picsPaths[i]); } } Program.Log(); } } return(duplicateNumbers); }
private static void Main(string[] args) { bool checkMode = true; bool showHelp = false, waitWhenDone = false; // Choose program mode and set paths to needed files #region Initialization string hhcFilePath = ""; if (args.Count() > 0) { // Check all given arguments foreach (string arg in args) { // Find and set path for main XML .hhc file if (File.Exists(arg) && Path.GetExtension(arg) == ".hhc" && hhcFilePath == "") { hhcFilePath = arg; } // If the parameter for an output file is specified, place its path into a variable Match outputFileMatch = Regex.Match(arg, @"(?<=((-o=)|(--out=))).*$"); if (outputFileMatch.Success && Directory.Exists(Path.GetDirectoryName(outputFileMatch.Value))) { logToFile = true; logFilePath = outputFileMatch.Value; if (File.Exists(logFilePath)) { File.Delete(logFilePath); } } if (arg == "-c" || arg == "--check") { checkMode = true; } else if (arg == "-f" || arg == "--fix") { checkMode = false; } if (arg == "-h" || arg == "--help") { showHelp = true; } if (arg == "-w" || arg == "--wait") { waitWhenDone = true; } } // Show help and close the program if the help parameter was specified if (showHelp) { Log(HelpText); if (waitWhenDone) { Console.Read(); } return; } else if (hhcFilePath == "") { // If main XML file wasn't found, tell about it and close the program since there's nothing to work with Log("\nОшибка: .hhc файл не был найден."); if (waitWhenDone) { Console.Read(); } return; } } else { Console.WriteLine("Отсутствуют параметры запуска. Введите параметр '-h' или '--help', чтобы открыть помощь."); return; } // If there was no arguments given, close the program List <string> htmlFiles = GetFilesListFromXML(hhcFilePath); #endregion // If check mode was chosen, check all types off issues, tell about them and close the program if (checkMode) { Checker.CheckMissingFiles(ref htmlFiles, true); Checker.CheckEmptyPictures(htmlFiles); Checker.CheckPicLabels(htmlFiles); Checker.GetDuplicates(htmlFiles, true); if (waitWhenDone) { Console.Read(); } return; } // If fix mode was chosen, check missing files only, without any warnings Checker.CheckMissingFiles(ref htmlFiles); Log("Были изменены следующие номера:"); Log(); List <Picture> pictures = new List <Picture>(); List <int> duplicateNumbers = Checker.GetDuplicates(htmlFiles); int currentPictureIndex = 0; // Get information about pictures and temporarily put their unique keys (which are correct picture numbers // wrapped in hash signs) in labels to distinguish corrected ones foreach (string filePath in htmlFiles) { string fileText = File.ReadAllText(filePath); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.Load(filePath, true); // Get nodes with classes used to mark pictures labels string xPath = ".//p[@class='Рисунокподпись' or @class='РисунокподписьЗнак']"; HtmlNodeCollection labelClassNodes = htmlDoc.DocumentNode.SelectNodes(xPath); if (labelClassNodes == null) { continue; } // Choose nodes with needed information var picLabelNodes = from node in labelClassNodes.AsEnumerable() where Regex.Match(node.InnerText, @"ис\.?\s?\d+", RegexOptions.Singleline).Success select node; foreach (HtmlNode labelNode in picLabelNodes) { currentPictureIndex++; // Get the number after "Рис." int picOldNumber = Convert.ToInt32(Regex.Match(labelNode.InnerText, @"(?<=(ис\.?\s?))\d+", RegexOptions.Singleline).Value); if (picOldNumber == currentPictureIndex) { continue; } // Skip if no numeration issues // Create a new Picture object with all information about current picture Picture pic = new Picture(filePath, picOldNumber, currentPictureIndex); pictures.Add(pic); // Put the key instead of old number fileText = Regex.Replace(fileText, $@"(?<=(ис\.?\s?)){pic.OldNumber}", pic.Key, RegexOptions.Singleline); Log($"'Рис.{pic.OldNumber}' -> 'Рис.{pic.NewNumber}' в файле {filePath}"); Log(); } File.WriteAllText(filePath, fileText); } // Deal with duplicates first to prevent their references from getting messed // The idea is to replace references to each duplicate-numbered picture inside file where it is located foreach (int duplicate in duplicateNumbers) { foreach (Picture pic in pictures) { if (pic.OldNumber != duplicate) { continue; } ReplacePicReferences(pic, pic.File); } } // Replace the remaining references with unique keys foreach (Picture pic in pictures) { foreach (string filePath in htmlFiles) { ReplacePicReferences(pic, filePath); } } // Finally replace keys with correct numbers foreach (Picture pic in pictures) { foreach (string filePath in htmlFiles) { string fileText = File.ReadAllText(filePath); fileText = Regex.Replace(fileText, pic.Key, $"{pic.NewNumber}", RegexOptions.Singleline); File.WriteAllText(filePath, fileText); } } if (waitWhenDone) { Console.Read(); } }