Exemple #1
0
        /// <summary>
        /// Global method for entities generation
        /// </summary>
        /// <param name="inputPath">The input path</param>
        /// <param name="output">Output stream</param>
        static void GenerateEntities(string inputPath, TextWriter output, string language)
        {
            output.WriteLine("<wis>");

            var jarRoot = StanfordEnv.GetStanfordHome();
            var classifiersDirectory = jarRoot + StanfordEnv.CLASIFIERS;

            string[] fileEntries = FilesUtils.GetFiles(inputPath);

            foreach (var document in fileEntries)
            {
                string text = FilesUtils.FileToText(document);
                // XXX: Better a NullObject, but string can't be inherited I think.
                if (text == null)
                {
                    var stderr = new StreamWriter(Console.OpenStandardError());
                    stderr.WriteLine($"The file '{document}' is not supported");
                    stderr.Close();
                    continue;
                }

                var classifier = CRFClassifiers.GetClassifierByLang(language);                 //CRFClassifier.getClassifierNoExceptions(classifiersDirectory + StanfordEnv.GetNerLanguageFiles(language));

                output.WriteLine(classifier.classifyToString(text, "xml", true));
            }
            output.WriteLine("</wis>");
        }
        /// <summary>
        /// Generates the match between textentities and dictionary entities.
        /// </summary>
        /// <param name="options">Options.</param>
        public override void Run()
        {
            if (options.Verbose)
            {
                Console.Error.WriteLine("Option 3.");
            }

            if (options.Dictionary == null)
            {
                Console.Error.WriteLine("Dictionary required. Exiting...");
                return;
            }

            TextWriter output;

            if (string.IsNullOrEmpty(options.Output))
            {
                output = new StreamWriter(Console.OpenStandardOutput());
            }
            else
            {
                output = new StreamWriter(options.Output);
            }

            foreach (string dic in FilesUtils.GetFiles(options.Dictionary))
            {
                DictionaryMatcher.MatchEntitiesInFiles(options.InputFile, dic, output, options.Separator, options.Language);
            }


            output.Close();
        }
Exemple #3
0
        /// <summary>
        /// Writes to the output stream a csv with the match results against the dictionary
        /// </summary>
        /// <param name="inputPath">Files path.</param>
        /// <param name="dicPath">Dictionary path.</param>
        /// <param name="output">Output stream.</param>
        public static void MatchEntitiesInFiles(string inputPath, string dicPath, TextWriter output, char sep, string language)
        {
            string[] files = FilesUtils.GetFiles(inputPath);
            foreach (string file in files)
            {
                string xml = NER.GenerateEntitiesToString(file, language);
                string csv = CSVUtils.RemoveDuplicates(CSVUtils.EntitiesToCsv(xml, sep));

                List <string[]> dicTable      = CSVUtils.TabulateCSV(new StreamReader(dicPath), sep);
                List <string[]> fileTable     = CSVUtils.TabulateCSV(new StringReader(csv), sep);
                List <string>   entitiesTable = GetEntitiesFromDic(dicTable);

                var matchs = MatchEntities(fileTable, entitiesTable);

                GenerateMatchedEntriesCSV(file, dicPath, matchs, output, sep);
            }
        }