예제 #1
0
        /// <summary>
        /// Generates the dictionary of entities found.
        /// </summary>
        /// <param name="options">Options.</param>
        public override void Run()
        {
            if (options.Verbose)
            {
                Console.Error.WriteLine("Dictionary generation command");
            }

            string xml = NER.GenerateEntitiesToString(options.InputFile, options.Language);
            string csv = CSVUtils.RemoveDuplicates(CSVUtils.EntitiesToCsv(xml, options.Separator));

            WriteResult(csv);
        }
예제 #2
0
        /// <summary>
        /// Writes to the output stream a csv with the match results against the dictionary
        /// </summary>
        /// <param name="inputPath">Files path.</param>
        /// <param name="dicPath">Dictionary path.</param>
        /// <param name="output">Output stream.</param>
        public static void MatchEntitiesInFiles(string inputPath, string dicPath, TextWriter output, char sep, string language)
        {
            string[] files = FilesUtils.GetFiles(inputPath);
            foreach (string file in files)
            {
                string xml = NER.GenerateEntitiesToString(file, language);
                string csv = CSVUtils.RemoveDuplicates(CSVUtils.EntitiesToCsv(xml, sep));

                List <string[]> dicTable      = CSVUtils.TabulateCSV(new StreamReader(dicPath), sep);
                List <string[]> fileTable     = CSVUtils.TabulateCSV(new StringReader(csv), sep);
                List <string>   entitiesTable = GetEntitiesFromDic(dicTable);

                var matchs = MatchEntities(fileTable, entitiesTable);

                GenerateMatchedEntriesCSV(file, dicPath, matchs, output, sep);
            }
        }