Ejemplo n.º 1
0
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Red;
            Console.WriteLine("ADD CREDIT TO TEXT FILES\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Text dir:  {_txtDir}\n" +
                $"Resp value: {_respValue}\n" +
                $"PersName value: {_persValue}\n" +
                $"Dry run: {_dry}\n");

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("ADD CREDIT TO TEXT FILES");

            foreach (string filePath in FileEnumerator.Enumerate(_txtDir,
                                                                 @"^[^-]+-[^-]+\.xml", true, true))
            {
                Console.WriteLine(filePath);
                Log.Logger.Information(filePath);

                XDocument doc = XDocument.Load(filePath,
                                               LoadOptions.PreserveWhitespace);

                // TEI/teiHeader/fileDesc/seriesStmt/
                XElement series = doc.Root
                                  ?.Element(XmlHelper.TEI + "teiHeader")
                                  ?.Element(XmlHelper.TEI + "fileDesc")
                                  ?.Element(XmlHelper.TEI + "seriesStmt");
                if (series == null)
                {
                    Log.Logger?.Error(
                        $"Unable to find seriesStmt in header for {Path.GetFileName(filePath)}");
                    continue;
                }

                // <respStmt>
                //   <resp key="MQDQ">RESPVALUE</resp>
                //   <persName>PERSVALUE</persName>
                // </respStmt>
                series.Add(new XElement(XmlHelper.TEI + "respStmt",
                                        new XElement(XmlHelper.TEI + "resp",
                                                     new XAttribute("key", "MQDQ"),
                                                     _respValue),
                                        new XElement(XmlHelper.TEI + "persName", _persValue)));

                if (!_dry)
                {
                    doc.Save(filePath);
                }
            }

            return(Task.CompletedTask);
        }
        private static void Run()
        {
            var sourceFiles = new FileEnumerator(The.Workspace.ProjectDirectory);

            using (new DirectoryChanger(The.Workspace.ProjectDirectory)) {
                var files = sourceFiles.Enumerate(".cs");
                foreach (var fileInfo in files)
                {
                    Console.WriteLine("* " + fileInfo.Path);
                    ProcessSourceFile(fileInfo.Path);
                }
            }
            using (new DirectoryChanger(The.Workspace.AssetsDirectory)) {
                var files = The.Workspace.AssetFiles.Enumerate(".tan");
                foreach (var fileInfo in files)
                {
                    Console.WriteLine("* " + fileInfo.Path);
                    ProcessSourceFile(fileInfo.Path);
                }
            }
        }
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("REMOVE OVERLAPS\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input:  {_appFileMask}\n" +
                $"Output: {_outputDir}\n" +
                $"Div list: {(_writeDivList ? "yes" : "no")}\n");

            int inputFileCount = 0;
            int removedCount   = 0;

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("REMOVE OVERLAPS");

            if (!Directory.Exists(_outputDir))
            {
                Directory.CreateDirectory(_outputDir);
            }

            HashSet <string> errDivIds = new HashSet <string>();

            // for each app document
            WordIdList widList = new WordIdList
            {
                Logger = loggerFactory.CreateLogger("report-overlaps")
            };

            foreach (string filePath in FileEnumerator.Enumerate(
                         _appFileDir, _appFileMask, _regexMask, _recursive))
            {
                Console.WriteLine();
                Log.Logger.Information("Parsing {FilePath}", filePath);

                // load app document
                string inputFileName = Path.GetFileNameWithoutExtension(filePath);
                Console.WriteLine(filePath);
                inputFileCount++;
                XDocument doc = XDocument.Load(filePath,
                                               LoadOptions.PreserveWhitespace | LoadOptions.SetLineInfo);

                // collect word IDs from text document
                widList.Parse(XDocument.Load(filePath.Replace("-app.", ".")));

                // collect app's locations
                List <AppElemLocations> appElemLocs =
                    AppElemLocationCollector.Collect(doc, widList,
                                                     AppElemLocationCollector.IsOverlappable);

                // detect and process overlaps
                for (int i = 0; i < appElemLocs.Count - 1; i++)
                {
                    for (int j = i + 1; j < appElemLocs.Count; j++)
                    {
                        if (appElemLocs[i].Overlaps(appElemLocs[j]))
                        {
                            // pick the target between the two overlapping app's
                            AppElemLocations target, source;
                            int targetIndex, sourceIndex;

                            if (IsFirstTarget(appElemLocs[i], appElemLocs[j]))
                            {
                                target = appElemLocs[targetIndex = i];
                                source = appElemLocs[sourceIndex = j];
                            }
                            else
                            {
                                source = appElemLocs[sourceIndex = i];
                                target = appElemLocs[targetIndex = j];
                            }

                            Log.Logger.Information("Merging overlapping app " +
                                                   $"{GetAttributesDump(source.Element)} into " +
                                                   GetAttributesDump(target.Element));

                            // log error if the source had @wit/@source
                            if (LemHasLostAttributes(
                                    source.Element.Element(XmlHelper.TEI + "lem"),
                                    target.Element.Element(XmlHelper.TEI + "lem")))
                            {
                                string divId = source.Element.Ancestors(
                                    XmlHelper.TEI + "div1")
                                               .First()
                                               .Attribute(XmlHelper.XML + "id").Value;

                                errDivIds.Add(divId);
                                Log.Logger.Error("Removed overlapping app lost sources at div "
                                                 + divId
                                                 + ": "
                                                 + GetAttributesDump(source.Element));
                            }

                            // append content of source into target in XML,
                            // excluding the lem child, and adding @n to each child
                            string nValue =
                                source.Element.Attribute("from").Value.Substring(1)
                                + " "
                                + source.Element.Attribute("to").Value.Substring(1);
                            foreach (XElement child in source.Element.Elements()
                                     .Where(e => e.Name.LocalName != "lem"))
                            {
                                child.SetAttributeValue("n", nValue);
                                target.Element.Add(child);
                            }

                            // remove source from XML and locs
                            source.Element.Remove();
                            appElemLocs.RemoveAt(sourceIndex);
                            removedCount++;

                            // continue looking from overlaps from the first
                            // of the two app's involved
                            i = Math.Min(sourceIndex, targetIndex) - 1;
                            goto nextOuter;
                        }
                    } // j
nextOuter:
                    if (i % 10 == 0)
                    {
                        Console.Write('.');
                    }
                } // i

                // save
                string path = Path.Combine(_outputDir, Path.GetFileName(filePath));
                doc.Save(path, SaveOptions.OmitDuplicateNamespaces);
            }

            if (_writeDivList)
            {
                using (StreamWriter listWriter = new StreamWriter(
                           Path.Combine(_outputDir, "overlap-err-divs.txt"),
                           false, Encoding.UTF8))
                {
                    foreach (string id in errDivIds)
                    {
                        listWriter.WriteLine(id);
                    }
                    listWriter.Flush();
                }
            }

            Console.WriteLine($"\nInput documents: {inputFileCount}");
            Console.WriteLine($"Removed overlaps: {removedCount}");
            return(Task.CompletedTask);
        }
Ejemplo n.º 4
0
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("PARSE TEXT\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input dir:  {_inputFileDir}\n" +
                $"Input mask: {_inputFileMask}\n" +
                $"Output dir: {_outputDir}\n" +
                $"Div IDs list: {_flagDivIdList ?? "(none)"}\n" +
                $"Max items per file: {_maxItemPerFile}\n");

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("PARSE TEXT");

            XmlTextParser parser = new XmlTextParser
            {
                Logger = loggerFactory.CreateLogger("parse-text")
            };

            int          inputFileCount = 0;
            int          totalItemCount = 0;
            StreamWriter writer         = null;

            if (!Directory.Exists(_outputDir))
            {
                Directory.CreateDirectory(_outputDir);
            }

            // load div IDs list if requested, prefixing and suffixing them
            // so that we are ready to find them in the item's title
            HashSet <string> flagDivIds = _flagDivIdList != null
                ? LoadDivIds(_flagDivIdList, "xml:id=", XmlHelper.CIT_SEPARATOR)
                : null;

            // for each input document
            foreach (string filePath in FileEnumerator.Enumerate(
                         _inputFileDir, _inputFileMask, _regexMask))
            {
                // load document
                string inputFileName = Path.GetFileNameWithoutExtension(filePath);
                Console.WriteLine("\n" + filePath);
                inputFileCount++;
                XDocument doc = XDocument.Load(filePath,
                                               LoadOptions.PreserveWhitespace);
                JsonSerializerSettings jsonSettings = new JsonSerializerSettings
                {
                    ContractResolver = new DefaultContractResolver
                    {
                        NamingStrategy = new CamelCaseNamingStrategy()
                    },
                    Formatting = Formatting.Indented
                };

                // parse items
                int itemCount = 0, outputFileCount = 0;

                foreach (IItem item in parser.Parse(
                             doc, Path.GetFileNameWithoutExtension(filePath)))
                {
                    if (++itemCount % 10 == 0)
                    {
                        Console.Write('.');
                    }

                    // set flag if required
                    if (flagDivIds.Any(s =>
                                       item.Title.IndexOf(s, StringComparison.Ordinal) > -1))
                    {
                        item.Flags |= 1;
                    }

                    // create new output file if required
                    if (writer == null ||
                        (_maxItemPerFile > 0 && itemCount > _maxItemPerFile))
                    {
                        if (writer != null)
                        {
                            CloseOutputFile(writer);
                        }
                        string path = Path.Combine(_outputDir,
                                                   $"{inputFileName}_{++outputFileCount:00000}.json");

                        writer = new StreamWriter(new FileStream(path,
                                                                 FileMode.Create, FileAccess.Write, FileShare.Read),
                                                  Encoding.UTF8);
                        writer.WriteLine("[");
                    }

                    // dump item into it
                    string json = JsonConvert.SerializeObject(
                        item, jsonSettings);
                    // string json = JsonSerializer.Serialize(item, typeof(object), options);
                    // this will output a , also for the last JSON array item,
                    // but we don't care about it -- that's just a dump, and
                    // it's easy to ignore/remove it if needed.
                    writer.WriteLine(json + ",");
                }
                totalItemCount += itemCount;
                if (writer != null)
                {
                    CloseOutputFile(writer);
                    writer = null;
                }
            }

            Console.WriteLine($"\nInput documents: {inputFileCount}");
            Console.WriteLine($"Output items: {totalItemCount}");

            return(Task.CompletedTask);
        }
Ejemplo n.º 5
0
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("REPORT OVERLAPS\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input:  {_appFileMask}\n" +
                $"Output: {_outputPath}\n");

            int inputFileCount = 0;
            int overlapCount   = 0;

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("REPORT OVERLAPS");

            using (StreamWriter writer = new StreamWriter(_outputPath, false,
                                                          Encoding.UTF8))
            {
                writer.WriteLine("# Overlaps Report");
                writer.WriteLine();

                writer.WriteLine($"Input: `{_appFileDir}{Path.DirectorySeparatorChar}{_appFileMask}`");
                writer.WriteLine();

                // for each app document
                WordIdList widList = new WordIdList
                {
                    Logger = loggerFactory.CreateLogger("report-overlaps")
                };
                foreach (string filePath in FileEnumerator.Enumerate(
                             _appFileDir, _appFileMask, _regexMask, _recursive))
                {
                    Console.WriteLine();
                    Log.Logger.Information("Parsing {FilePath}", filePath);

                    // load app document
                    string inputFileName = Path.GetFileNameWithoutExtension(filePath);
                    Console.WriteLine(filePath);
                    inputFileCount++;
                    XDocument doc = XDocument.Load(filePath,
                                                   LoadOptions.PreserveWhitespace | LoadOptions.SetLineInfo);

                    // collect word IDs from text document
                    widList.Parse(XDocument.Load(filePath.Replace("-app.", ".")));

                    // collect app's locations
                    List <AppElemLocations> appElemLocs =
                        AppElemLocationCollector.Collect(doc, widList,
                                                         AppElemLocationCollector.IsOverlappable);

                    // detect and report overlaps
                    for (int i = 0; i < appElemLocs.Count - 1; i++)
                    {
                        for (int j = i + 1; j < appElemLocs.Count; j++)
                        {
                            if (appElemLocs[i].Overlaps(appElemLocs[j]))
                            {
                                writer.WriteLine($"## Overlap {++overlapCount}");
                                writer.WriteLine();
                                writer.WriteLine(Path.GetFileName(filePath) +
                                                 $" at {appElemLocs[i].LineNumber}");

                                // text
                                int n = 0;
                                foreach (var iw in appElemLocs[i].Locations)
                                {
                                    if (++n > 1)
                                    {
                                        writer.Write(' ');
                                    }
                                    writer.Write($"`{iw.Item1}`=`{iw.Item2}`");
                                }
                                writer.WriteLine();
                                writer.WriteLine();

                                // app
                                WriteAppXml(appElemLocs[i], writer);
                                WriteAppXml(appElemLocs[j], writer);
                                goto nextOuter;
                            }
                        }
nextOuter:
                        if (i % 10 == 0)
                        {
                            Console.Write('.');
                        }
                    }
                    Console.WriteLine();
                }
                writer.Flush();
            }

            Console.WriteLine($"\nInput documents: {inputFileCount}");
            return(Task.CompletedTask);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Runs this command.
        /// </summary>
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("PARTITION\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input dir:  {_inputDir}\n" +
                $"Input mask: {_fileMask}\n" +
                $"Output dir: {_outputDir}\n" +
                $"Min: {_minTreshold}\n" +
                $"Max: {_maxTreshold}\n" +
                $"Recursive: {_recursive}\n");
            Log.Logger.Information("PARTITION");

            XmlPartitioner partitioner = new XmlPartitioner
            {
                MinTreshold = _minTreshold,
                MaxTreshold = _maxTreshold
            };

            int partitioned = 0, total = 0;

            if (!Directory.Exists(_outputDir))
            {
                Directory.CreateDirectory(_outputDir);
            }

            foreach (string filePath in FileEnumerator.Enumerate(
                         _inputDir, _fileMask, _regexMask, _recursive))
            {
                total++;
                Console.Write(filePath);

                XDocument doc = XDocument.Load(filePath,
                                               LoadOptions.PreserveWhitespace);

                bool touched = partitioner.Partition(doc,
                                                     Path.GetFileNameWithoutExtension(filePath));

                string outputPath =
                    Path.Combine(_outputDir, Path.GetFileName(filePath));

                if (touched)
                {
                    partitioned++;
                    Console.WriteLine($" => {outputPath}");
                    if (!Directory.Exists(_outputDir))
                    {
                        Directory.CreateDirectory(_outputDir);
                    }
                    doc.Save(outputPath, SaveOptions.OmitDuplicateNamespaces);
                }
                else
                {
                    File.Copy(filePath, outputPath);
                    Console.WriteLine();
                }
            }

            Console.WriteLine($"Total files: {total}");
            Console.WriteLine($"Partitioned files: {partitioned}");

            return(Task.CompletedTask);
        }
Ejemplo n.º 7
0
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Red;
            Console.WriteLine("IMPORT JSON TEXT AND APPARATUS\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Text dir:  {_txtFileDir}\n" +
                $"Text mask: {_txtFileMask}\n" +
                $"Apparatus dir: {_appFileDir}\n" +
                $"Profile file: {_profilePath}\n" +
                $"Database: {_database}\n" +
                $"Dry run: {_dry}\n");

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("IMPORT JSON TEXT AND APPARATUS");

            if (!_dry)
            {
                // create database if not exists
                string connection = string.Format(CultureInfo.InvariantCulture,
                                                  _config.GetConnectionString("Mongo"),
                                                  _database);

                IDatabaseManager manager = new MongoDatabaseManager();

                string profileContent             = LoadProfile(_profilePath);
                IDataProfileSerializer serializer = new JsonDataProfileSerializer();
                DataProfile            profile    = serializer.Read(profileContent);

                if (!manager.DatabaseExists(connection))
                {
                    Console.WriteLine("Creating database...");
                    Log.Information($"Creating database {_database}...");

                    manager.CreateDatabase(connection, profile);

                    Console.WriteLine("Database created.");
                    Log.Information("Database created.");
                }
            }
            else
            {
                if (!File.Exists(_profilePath))
                {
                    string error = "Profile path not found: " + _profilePath;
                    Console.WriteLine(error);
                    Log.Error(error);
                    return(Task.CompletedTask);
                }
            }

            ICadmusRepository repository =
                _repositoryService.CreateRepository(_database);

            JsonImporter importer = new JsonImporter(repository)
            {
                Logger = loggerFactory.CreateLogger("json-importer"),
                IsDry  = _dry
            };

            int inputFileCount = 0;

            // 1) import text
            string[] files = FileEnumerator.Enumerate(
                _txtFileDir, _txtFileMask, _regexMask).ToArray();
            HashSet <string> fileNames = new HashSet <string>();

            Console.WriteLine($"Importing text from {files.Length} file(s)...");

            foreach (string txtFilePath in files)
            {
                fileNames.Add(
                    StripFileNameNr(
                        Path.GetFileNameWithoutExtension(txtFilePath)));
                Console.WriteLine(txtFilePath);
                inputFileCount++;

                using (Stream stream = new FileStream(txtFilePath, FileMode.Open,
                                                      FileAccess.Read, FileShare.Read))
                {
                    importer.ImportText(stream);
                }
            }

            // 2) import apparatus
            Console.WriteLine("Importing apparatus...");

            foreach (string fileName in fileNames)
            {
                Console.WriteLine(fileName);

                foreach (string appFilePath in Directory.EnumerateFiles(
                             _appFileDir, fileName + "-app_*.json"))
                {
                    Console.WriteLine("  " + appFilePath);
                    using (Stream stream = new FileStream(appFilePath, FileMode.Open,
                                                          FileAccess.Read, FileShare.Read))
                    {
                        importer.ImportApparatus(stream);
                    }
                }
            }

            return(Task.CompletedTask);
        }
Ejemplo n.º 8
0
        public Task Run()
        {
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine("PARSE APPARATUS\n");
            Console.ResetColor();
            Console.WriteLine(
                $"Input:  {_inputFileMask}\n" +
                $"Output: {_outputDir}\n" +
                $"Max items per file: {_maxItemPerFile}\n");

            ILoggerFactory loggerFactory = new LoggerFactory();

            loggerFactory.AddSerilog(Log.Logger);
            Log.Logger.Information("PARSE APPARATUS");

            XmlApparatusParser parser = new XmlApparatusParser
            {
                Logger = loggerFactory.CreateLogger("parse-app")
            };

            int          inputFileCount = 0;
            int          totalPartCount = 0;
            StreamWriter writer         = null;

            if (!Directory.Exists(_outputDir))
            {
                Directory.CreateDirectory(_outputDir);
            }

            // for each input document
            foreach (string filePath in FileEnumerator.Enumerate(
                         _inputFileDir, _inputFileMask, _regexMask, _recursive))
            {
                Console.WriteLine();
                Log.Logger.Information("Parsing {FilePath}", filePath);

                // load document
                string inputFileName = Path.GetFileNameWithoutExtension(filePath);
                Console.WriteLine(filePath);
                inputFileCount++;
                XDocument doc = XDocument.Load(filePath,
                                               LoadOptions.PreserveWhitespace | LoadOptions.SetLineInfo);
                JsonSerializerSettings jsonSettings = new JsonSerializerSettings
                {
                    ContractResolver = new DefaultContractResolver
                    {
                        NamingStrategy = new CamelCaseNamingStrategy()
                    },
                    Formatting = Formatting.Indented
                };

                // load index
                string textFileName = inputFileName.Replace("-app", "");
                LoadTextIndex(textFileName);

                // parse
                int partCount = 0, outputFileCount = 0;

                foreach (var part in parser.Parse(doc, textFileName, _textIndex))
                {
                    if (++partCount % 10 == 0)
                    {
                        Console.Write('.');
                    }

                    // create new output file if required
                    if (writer == null ||
                        (_maxItemPerFile > 0 && partCount > _maxItemPerFile))
                    {
                        if (writer != null)
                        {
                            CloseOutputFile(writer);
                        }
                        string path = Path.Combine(_outputDir,
                                                   $"{inputFileName}_{++outputFileCount:00000}.json");

                        writer = new StreamWriter(new FileStream(path,
                                                                 FileMode.Create, FileAccess.Write, FileShare.Read),
                                                  Encoding.UTF8);
                        writer.WriteLine("[");
                    }

                    // dump part into it
                    string json = JsonConvert.SerializeObject(part, jsonSettings);
                    writer.WriteLine(json + ",");
                }
                totalPartCount += partCount;
                if (writer != null)
                {
                    CloseOutputFile(writer);
                    writer = null;
                }
            }

            Console.WriteLine($"\nInput documents: {inputFileCount}");
            Console.WriteLine($"Output parts: {totalPartCount}");

            return(Task.CompletedTask);
        }