Exemplo n.º 1
0
        /// <summary>Checks HTML pages for duplicated picture numbers</summary>
        /// <returns>Duplicated numbers</returns>
        public static List <int> GetDuplicates(List <string> htmlFiles, bool consoleWarnings = false)
        {
            List <int>    pictureNumbers = new List <int>();
            List <string> picsPaths      = new List <string>();

            List <int> duplicateNumbers = new List <int>();

            foreach (string filePath in htmlFiles)
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.Load(filePath, true);

                // Select picture labels
                string             xPath         = ".//p[@class='Рисунокподпись' or @class='РисунокподписьЗнак']";
                HtmlNodeCollection picLabelNodes = htmlDoc.DocumentNode.SelectNodes(xPath);
                var query = picLabelNodes.AsEnumerable()?.Where(node => Regex.Match(node.InnerText, @"ис\.?\s?\d+").Success);

                if (query == null)
                {
                    continue;
                }

                // Add repetitive picture numbers to the duplicates list
                foreach (HtmlNode labelNode in query)
                {
                    int picNumber = Convert.ToInt32(Regex.Match(labelNode.InnerText, @"(?<=(ис\.?\s?))\d+").Value);

                    if (!duplicateNumbers.Contains(picNumber) && pictureNumbers.Contains(picNumber))
                    {
                        duplicateNumbers.Add(picNumber);
                    }

                    pictureNumbers.Add(picNumber);
                    picsPaths.Add(filePath);
                }
            }

            duplicateNumbers.Sort();

            if (consoleWarnings)
            {
                foreach (int num in duplicateNumbers)
                {
                    Program.Log($"Картинки с именем 'Рис.{num}' найдены в следующих файлах:");
                    for (int i = 0; i < pictureNumbers.Count; i++)
                    {
                        if (pictureNumbers[i] == num)
                        {
                            Program.Log(picsPaths[i]);
                        }
                    }
                    Program.Log();
                }
            }

            return(duplicateNumbers);
        }
Exemplo n.º 2
0
        private static void Main(string[] args)
        {
            bool checkMode = true;
            bool showHelp = false, waitWhenDone = false;

            // Choose program mode and set paths to needed files
            #region Initialization
            string hhcFilePath = "";

            if (args.Count() > 0)
            {
                // Check all given arguments
                foreach (string arg in args)
                {
                    // Find and set path for main XML .hhc file
                    if (File.Exists(arg) && Path.GetExtension(arg) == ".hhc" && hhcFilePath == "")
                    {
                        hhcFilePath = arg;
                    }

                    // If the parameter for an output file is specified, place its path into a variable
                    Match outputFileMatch = Regex.Match(arg, @"(?<=((-o=)|(--out=))).*$");
                    if (outputFileMatch.Success && Directory.Exists(Path.GetDirectoryName(outputFileMatch.Value)))
                    {
                        logToFile   = true;
                        logFilePath = outputFileMatch.Value;
                        if (File.Exists(logFilePath))
                        {
                            File.Delete(logFilePath);
                        }
                    }

                    if (arg == "-c" || arg == "--check")
                    {
                        checkMode = true;
                    }
                    else if (arg == "-f" || arg == "--fix")
                    {
                        checkMode = false;
                    }

                    if (arg == "-h" || arg == "--help")
                    {
                        showHelp = true;
                    }

                    if (arg == "-w" || arg == "--wait")
                    {
                        waitWhenDone = true;
                    }
                }

                // Show help and close the program if the help parameter was specified
                if (showHelp)
                {
                    Log(HelpText);
                    if (waitWhenDone)
                    {
                        Console.Read();
                    }
                    return;
                }
                else if (hhcFilePath == "")
                {
                    // If main XML file wasn't found, tell about it and close the program since there's nothing to work with
                    Log("\nОшибка: .hhc файл не был найден.");
                    if (waitWhenDone)
                    {
                        Console.Read();
                    }
                    return;
                }
            }
            else
            {
                Console.WriteLine("Отсутствуют параметры запуска. Введите параметр '-h' или '--help', чтобы открыть помощь.");
                return;
            } // If there was no arguments given, close the program

            List <string> htmlFiles = GetFilesListFromXML(hhcFilePath);
            #endregion

            // If check mode was chosen, check all types off issues, tell about them and close the program
            if (checkMode)
            {
                Checker.CheckMissingFiles(ref htmlFiles, true);
                Checker.CheckEmptyPictures(htmlFiles);
                Checker.CheckPicLabels(htmlFiles);
                Checker.GetDuplicates(htmlFiles, true);

                if (waitWhenDone)
                {
                    Console.Read();
                }
                return;
            }

            // If fix mode was chosen, check missing files only, without any warnings
            Checker.CheckMissingFiles(ref htmlFiles);

            Log("Были изменены следующие номера:");
            Log();

            List <Picture> pictures            = new List <Picture>();
            List <int>     duplicateNumbers    = Checker.GetDuplicates(htmlFiles);
            int            currentPictureIndex = 0;

            // Get information about pictures and temporarily put their unique keys (which are correct picture numbers
            // wrapped in hash signs) in labels to distinguish corrected ones
            foreach (string filePath in htmlFiles)
            {
                string fileText = File.ReadAllText(filePath);

                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.Load(filePath, true);

                // Get nodes with classes used to mark pictures labels
                string             xPath           = ".//p[@class='Рисунокподпись' or @class='РисунокподписьЗнак']";
                HtmlNodeCollection labelClassNodes = htmlDoc.DocumentNode.SelectNodes(xPath);
                if (labelClassNodes == null)
                {
                    continue;
                }
                // Choose nodes with needed information
                var picLabelNodes = from node in labelClassNodes.AsEnumerable()
                                    where Regex.Match(node.InnerText, @"ис\.?\s?\d+", RegexOptions.Singleline).Success
                                    select node;

                foreach (HtmlNode labelNode in picLabelNodes)
                {
                    currentPictureIndex++;

                    // Get the number after "Рис."
                    int picOldNumber = Convert.ToInt32(Regex.Match(labelNode.InnerText, @"(?<=(ис\.?\s?))\d+", RegexOptions.Singleline).Value);
                    if (picOldNumber == currentPictureIndex)
                    {
                        continue;
                    }                                                      // Skip if no numeration issues

                    // Create a new Picture object with all information about current picture
                    Picture pic = new Picture(filePath, picOldNumber, currentPictureIndex);
                    pictures.Add(pic);

                    // Put the key instead of old number
                    fileText = Regex.Replace(fileText, $@"(?<=(ис\.?\s?)){pic.OldNumber}", pic.Key, RegexOptions.Singleline);

                    Log($"'Рис.{pic.OldNumber}' -> 'Рис.{pic.NewNumber}' в файле {filePath}");
                    Log();
                }

                File.WriteAllText(filePath, fileText);
            }

            // Deal with duplicates first to prevent their references from getting messed
            // The idea is to replace references to each duplicate-numbered picture inside file where it is located
            foreach (int duplicate in duplicateNumbers)
            {
                foreach (Picture pic in pictures)
                {
                    if (pic.OldNumber != duplicate)
                    {
                        continue;
                    }
                    ReplacePicReferences(pic, pic.File);
                }
            }

            // Replace the remaining references with unique keys
            foreach (Picture pic in pictures)
            {
                foreach (string filePath in htmlFiles)
                {
                    ReplacePicReferences(pic, filePath);
                }
            }

            // Finally replace keys with correct numbers
            foreach (Picture pic in pictures)
            {
                foreach (string filePath in htmlFiles)
                {
                    string fileText = File.ReadAllText(filePath);
                    fileText = Regex.Replace(fileText, pic.Key, $"{pic.NewNumber}", RegexOptions.Singleline);
                    File.WriteAllText(filePath, fileText);
                }
            }
            if (waitWhenDone)
            {
                Console.Read();
            }
        }