public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("REMOVE OVERLAPS\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input:  {_appFileMask}\n" +
                $"Output: {_outputDir}\n" +
                $"Div list: {(_writeDivList ? "yes" : "no")}\n");

            int inputFileCount = 0;
            int removedCount   = 0;

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("REMOVE OVERLAPS");

            if (!Directory.Exists(_outputDir))
            {
                Directory.CreateDirectory(_outputDir);
            }

            HashSet <string> errDivIds = new HashSet <string>();

            // for each app document
            WordIdList widList = new WordIdList
            {
                Logger = loggerFactory.CreateLogger("report-overlaps")
            };

            foreach (string filePath in FileEnumerator.Enumerate(
                         _appFileDir, _appFileMask, _regexMask, _recursive))
            {
                Console.WriteLine();
                Log.Logger.Information("Parsing {FilePath}", filePath);

                // load app document
                string inputFileName = Path.GetFileNameWithoutExtension(filePath);
                Console.WriteLine(filePath);
                inputFileCount++;
                XDocument doc = XDocument.Load(filePath,
                                               LoadOptions.PreserveWhitespace | LoadOptions.SetLineInfo);

                // collect word IDs from text document
                widList.Parse(XDocument.Load(filePath.Replace("-app.", ".")));

                // collect app's locations
                List <AppElemLocations> appElemLocs =
                    AppElemLocationCollector.Collect(doc, widList,
                                                     AppElemLocationCollector.IsOverlappable);

                // detect and process overlaps
                for (int i = 0; i < appElemLocs.Count - 1; i++)
                {
                    for (int j = i + 1; j < appElemLocs.Count; j++)
                    {
                        if (appElemLocs[i].Overlaps(appElemLocs[j]))
                        {
                            // pick the target between the two overlapping app's
                            AppElemLocations target, source;
                            int targetIndex, sourceIndex;

                            if (IsFirstTarget(appElemLocs[i], appElemLocs[j]))
                            {
                                target = appElemLocs[targetIndex = i];
                                source = appElemLocs[sourceIndex = j];
                            }
                            else
                            {
                                source = appElemLocs[sourceIndex = i];
                                target = appElemLocs[targetIndex = j];
                            }

                            Log.Logger.Information("Merging overlapping app " +
                                                   $"{GetAttributesDump(source.Element)} into " +
                                                   GetAttributesDump(target.Element));

                            // log error if the source had @wit/@source
                            if (LemHasLostAttributes(
                                    source.Element.Element(XmlHelper.TEI + "lem"),
                                    target.Element.Element(XmlHelper.TEI + "lem")))
                            {
                                string divId = source.Element.Ancestors(
                                    XmlHelper.TEI + "div1")
                                               .First()
                                               .Attribute(XmlHelper.XML + "id").Value;

                                errDivIds.Add(divId);
                                Log.Logger.Error("Removed overlapping app lost sources at div "
                                                 + divId
                                                 + ": "
                                                 + GetAttributesDump(source.Element));
                            }

                            // append content of source into target in XML,
                            // excluding the lem child, and adding @n to each child
                            string nValue =
                                source.Element.Attribute("from").Value.Substring(1)
                                + " "
                                + source.Element.Attribute("to").Value.Substring(1);
                            foreach (XElement child in source.Element.Elements()
                                     .Where(e => e.Name.LocalName != "lem"))
                            {
                                child.SetAttributeValue("n", nValue);
                                target.Element.Add(child);
                            }

                            // remove source from XML and locs
                            source.Element.Remove();
                            appElemLocs.RemoveAt(sourceIndex);
                            removedCount++;

                            // continue looking from overlaps from the first
                            // of the two app's involved
                            i = Math.Min(sourceIndex, targetIndex) - 1;
                            goto nextOuter;
                        }
                    } // j
nextOuter:
                    if (i % 10 == 0)
                    {
                        Console.Write('.');
                    }
                } // i

                // save
                string path = Path.Combine(_outputDir, Path.GetFileName(filePath));
                doc.Save(path, SaveOptions.OmitDuplicateNamespaces);
            }

            if (_writeDivList)
            {
                using (StreamWriter listWriter = new StreamWriter(
                           Path.Combine(_outputDir, "overlap-err-divs.txt"),
                           false, Encoding.UTF8))
                {
                    foreach (string id in errDivIds)
                    {
                        listWriter.WriteLine(id);
                    }
                    listWriter.Flush();
                }
            }

            Console.WriteLine($"\nInput documents: {inputFileCount}");
            Console.WriteLine($"Removed overlaps: {removedCount}");
            return(Task.CompletedTask);
        }
示例#2
0
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("REPORT OVERLAPS\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input:  {_appFileMask}\n" +
                $"Output: {_outputPath}\n");

            int inputFileCount = 0;
            int overlapCount   = 0;

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("REPORT OVERLAPS");

            using (StreamWriter writer = new StreamWriter(_outputPath, false,
                                                          Encoding.UTF8))
            {
                writer.WriteLine("# Overlaps Report");
                writer.WriteLine();

                writer.WriteLine($"Input: `{_appFileDir}{Path.DirectorySeparatorChar}{_appFileMask}`");
                writer.WriteLine();

                // for each app document
                WordIdList widList = new WordIdList
                {
                    Logger = loggerFactory.CreateLogger("report-overlaps")
                };
                foreach (string filePath in FileEnumerator.Enumerate(
                             _appFileDir, _appFileMask, _regexMask, _recursive))
                {
                    Console.WriteLine();
                    Log.Logger.Information("Parsing {FilePath}", filePath);

                    // load app document
                    string inputFileName = Path.GetFileNameWithoutExtension(filePath);
                    Console.WriteLine(filePath);
                    inputFileCount++;
                    XDocument doc = XDocument.Load(filePath,
                                                   LoadOptions.PreserveWhitespace | LoadOptions.SetLineInfo);

                    // collect word IDs from text document
                    widList.Parse(XDocument.Load(filePath.Replace("-app.", ".")));

                    // collect app's locations
                    List <AppElemLocations> appElemLocs =
                        AppElemLocationCollector.Collect(doc, widList,
                                                         AppElemLocationCollector.IsOverlappable);

                    // detect and report overlaps
                    for (int i = 0; i < appElemLocs.Count - 1; i++)
                    {
                        for (int j = i + 1; j < appElemLocs.Count; j++)
                        {
                            if (appElemLocs[i].Overlaps(appElemLocs[j]))
                            {
                                writer.WriteLine($"## Overlap {++overlapCount}");
                                writer.WriteLine();
                                writer.WriteLine(Path.GetFileName(filePath) +
                                                 $" at {appElemLocs[i].LineNumber}");

                                // text
                                int n = 0;
                                foreach (var iw in appElemLocs[i].Locations)
                                {
                                    if (++n > 1)
                                    {
                                        writer.Write(' ');
                                    }
                                    writer.Write($"`{iw.Item1}`=`{iw.Item2}`");
                                }
                                writer.WriteLine();
                                writer.WriteLine();

                                // app
                                WriteAppXml(appElemLocs[i], writer);
                                WriteAppXml(appElemLocs[j], writer);
                                goto nextOuter;
                            }
                        }
nextOuter:
                        if (i % 10 == 0)
                        {
                            Console.Write('.');
                        }
                    }
                    Console.WriteLine();
                }
                writer.Flush();
            }

            Console.WriteLine($"\nInput documents: {inputFileCount}");
            return(Task.CompletedTask);
        }