/// <summary> /// Global method for entities generation /// </summary> /// <param name="inputPath">The input path</param> /// <param name="output">Output stream</param> static void GenerateEntities(string inputPath, TextWriter output, string language) { output.WriteLine("<wis>"); var jarRoot = StanfordEnv.GetStanfordHome(); var classifiersDirectory = jarRoot + StanfordEnv.CLASIFIERS; string[] fileEntries = FilesUtils.GetFiles(inputPath); foreach (var document in fileEntries) { string text = FilesUtils.FileToText(document); // XXX: Better a NullObject, but string can't be inherited I think. if (text == null) { var stderr = new StreamWriter(Console.OpenStandardError()); stderr.WriteLine($"The file '{document}' is not supported"); stderr.Close(); continue; } var classifier = CRFClassifiers.GetClassifierByLang(language); //CRFClassifier.getClassifierNoExceptions(classifiersDirectory + StanfordEnv.GetNerLanguageFiles(language)); output.WriteLine(classifier.classifyToString(text, "xml", true)); } output.WriteLine("</wis>"); }
/// <summary> /// Generates the match between textentities and dictionary entities. /// </summary> /// <param name="options">Options.</param> public override void Run() { if (options.Verbose) { Console.Error.WriteLine("Option 3."); } if (options.Dictionary == null) { Console.Error.WriteLine("Dictionary required. Exiting..."); return; } TextWriter output; if (string.IsNullOrEmpty(options.Output)) { output = new StreamWriter(Console.OpenStandardOutput()); } else { output = new StreamWriter(options.Output); } foreach (string dic in FilesUtils.GetFiles(options.Dictionary)) { DictionaryMatcher.MatchEntitiesInFiles(options.InputFile, dic, output, options.Separator, options.Language); } output.Close(); }
/// <summary> /// Writes to the output stream a csv with the match results against the dictionary /// </summary> /// <param name="inputPath">Files path.</param> /// <param name="dicPath">Dictionary path.</param> /// <param name="output">Output stream.</param> public static void MatchEntitiesInFiles(string inputPath, string dicPath, TextWriter output, char sep, string language) { string[] files = FilesUtils.GetFiles(inputPath); foreach (string file in files) { string xml = NER.GenerateEntitiesToString(file, language); string csv = CSVUtils.RemoveDuplicates(CSVUtils.EntitiesToCsv(xml, sep)); List <string[]> dicTable = CSVUtils.TabulateCSV(new StreamReader(dicPath), sep); List <string[]> fileTable = CSVUtils.TabulateCSV(new StringReader(csv), sep); List <string> entitiesTable = GetEntitiesFromDic(dicTable); var matchs = MatchEntities(fileTable, entitiesTable); GenerateMatchedEntriesCSV(file, dicPath, matchs, output, sep); } }