public override ObjectStream <T> create <T>(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); int typesToGenerate = 0; if (@params.Types.Contains("DNA")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_DNA_ENTITIES; } else if (@params.Types.Contains("protein")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_PROTEIN_ENTITIES; } else if (@params.Types.Contains("cell_type")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLTYPE_ENTITIES; } else if (@params.Types.Contains("cell_line")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLLINE_ENTITIES; } else if (@params.Types.Contains("RNA")) { typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES; } return(new BioNLP2004NameSampleStream(CmdLineUtil.openInFile(@params.Data), typesToGenerate)); }
internal void process() { ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); ObjectStream <string> tokenizedLineStream = new WhitespaceTokenStream(new TokenizerStream(tokenizer, untokenizedLineStream)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string tokenizedLine; while ((tokenizedLine = tokenizedLineStream.read()) != null) { Console.WriteLine(tokenizedLine); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); }
internal static sbyte[] openFeatureGeneratorBytes(Jfile featureGenDescriptorFile) { sbyte[] featureGeneratorBytes = null; // load descriptor file into memory if (featureGenDescriptorFile != null) { InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile); try { featureGeneratorBytes = ModelUtil.read(bytesIn); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { bytesIn.close(); } catch (IOException) { // sorry that this can fail } } } return(featureGeneratorBytes); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } IList <EvaluationMonitor <ChunkSample> > listeners = new LinkedList <EvaluationMonitor <ChunkSample> >(); ChunkerDetailedFMeasureListener detailedFMeasureListener = null; if (@params.Misclassified.Value) { listeners.Add(new ChunkEvaluationErrorListener()); } if (@params.DetailedF.Value) { detailedFMeasureListener = new ChunkerDetailedFMeasureListener(); listeners.Add(detailedFMeasureListener); } ChunkerCrossValidator validator; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); validator = new ChunkerCrossValidator(@params.Lang, mlParams, chunkerFactory, listeners.ToArray()); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } if (detailedFMeasureListener == null) { FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); } else { Console.WriteLine(detailedFMeasureListener.ToString()); } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); char[] eos = null; if (@params.EosChars != null) { eos = @params.EosChars.ToCharArray(); } SentenceModel model; try { Dictionary dict = loadDict(@params.AbbDict); SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos); model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("sentence detector", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile); TokenizerModel model; try { Dictionary dict = loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("tokenizer", modelOutFile, model); }
internal static Dictionary loadDict(Jfile f) { Dictionary dict = null; if (f != null) { CmdLineUtil.checkInputFile("abb dict", f); dict = new Dictionary(new FileInputStream(f)); } return(dict); }
public override ObjectStream <CorefSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); CmdLineUtil.checkInputFile("Data", @params.Data); FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new ParagraphStream(new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding)); return(new CorefSampleDataStream(lineStream)); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } SDCrossValidator validator; SentenceDetectorEvaluationMonitor errorListener = null; if (@params.Misclassified.Value) { errorListener = new SentenceEvaluationErrorListener(); } char[] eos = null; if (@params.EosChars != null) { eos = @params.EosChars.ToCharArray(); } try { Dictionary abbreviations = SentenceDetectorTrainerTool.loadDict(@params.AbbDict); SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, abbreviations, eos); validator = new SDCrossValidator(@params.Lang, mlParams, sdFactory, errorListener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen); // TODO: Support Custom resources: // Must be loaded into memory, or written to tmp file until descriptor // is loaded which defines parses when model is loaded IDictionary <string, object> resources = loadResources(@params.Resources); CmdLineUtil.checkOutputFile("name finder model", modelOutFile); if (@params.NameTypes != null) { string[] nameTypes = @params.NameTypes.Split(",", true); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); } TokenNameFinderModel model; try { model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("name finder", modelOutFile, model); }
public override ObjectStream <NameSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); return(new ADNameSampleStream(lineStream, @params.SplitHyphenatedTokens.Value)); }
public override ObjectStream <POSSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream, @params.ExpandME.Value, @params.IncludeFeatures.Value); return(sentenceStream); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0])); ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { POSSample posSample; try { posSample = POSSample.parse(line); } catch (InvalidFormatException) { Console.Error.WriteLine("Invalid format:"); Console.Error.WriteLine(line); continue; } string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags); Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override ObjectStream<DocumentSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; try { return new LeipzigDoccatSampleStream(@params.Lang, 20, CmdLineUtil.openInFile(@params.Data)); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while opening sample data: " + e.Message, e); } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } TokenizerCrossValidator validator; TokenizerEvaluationMonitor listener = null; if (@params.Misclassified.Value) { listener = new TokenEvaluationErrorListener(); } try { Dictionary dict = TokenizerTrainerTool.loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); validator = new TokenizerCrossValidator(mlParams, tokFactory, listener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); }
public static int RunShellCommand(DirectoryInfo workingDir, string cmdLineString) { var cmdLine = CmdLineUtil.SplitCmdAndArgs(cmdLineString); var cmd = cmdLine[0]; var args = cmdLine.Length > 1 ? cmdLine[1] : null; var psi = new System.Diagnostics.ProcessStartInfo(cmd, args); psi.WorkingDirectory = workingDir.FullName; psi.UseShellExecute = false; var proc = System.Diagnostics.Process.Start(psi); proc.WaitForExit(); return(proc.ExitCode); }
public override ObjectStream <NameSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); // TODO: support the other languages with this CoNLL. LANGUAGE lang; if ("en".Equals(@params.Lang)) { lang = LANGUAGE.EN; language = @params.Lang; } else if ("de".Equals(@params.Lang)) { lang = LANGUAGE.DE; language = @params.Lang; } else { throw new TerminateToolException(1, "Unsupported language: " + @params.Lang); } int typesToGenerate = 0; if (@params.Types.Contains("per")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_PERSON_ENTITIES; } if (@params.Types.Contains("org")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES; } if (@params.Types.Contains("loc")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES; } if (@params.Types.Contains("misc")) { typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; } return(new Conll03NameSampleStream(lang, CmdLineUtil.openInFile(@params.Data), typesToGenerate)); }
public override void run(string[] args) { if (args.Length != 2) { Console.WriteLine(Help); } else { File parserModelInFile = new File(args[0]); ParserModel parserModel = (new ParserModelLoader()).load(parserModelInFile); File taggerModelInFile = new File(args[1]); POSModel taggerModel = (new POSModelLoader()).load(taggerModelInFile); ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel); CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel); } }
public override ObjectStream <POSSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); ObjectStream <string> lineStream; try { lineStream = new PlainTextByLineStream(new InputStreamReader(CmdLineUtil.openInFile(@params.Data), "UTF-8")); //Console.Out = new PrintStream(System.out, true, "UTF-8"); return(new ConllXPOSSampleStream(lineStream)); } catch (UnsupportedEncodingException e) { // this shouldn't happen throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.Message, e); } }
/// <summary> /// Perform sentence detection the input stream. /// /// A newline will be treated as a paragraph boundary. /// </summary> public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { SentenceModel model = (new SentenceModelLoader()).load(new File(args[0])); SentenceDetectorME sdetector = new SentenceDetectorME(model); ObjectStream <string> paraStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput))); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string para; while ((para = paraStream.read()) != null) { string[] sents = sdetector.sentDetect(para); foreach (string sentence in sents) { Console.WriteLine(sentence); } perfMon.incrementCounter(sents.Length); Console.WriteLine(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override void run(string[] args) { Params @params = validateAndParseParams(args, typeof(Params)); File dictInFile = @params.InputFile; File dictOutFile = @params.OutputFile; Charset encoding = @params.Encoding; CmdLineUtil.checkInputFile("dictionary input file", dictInFile); CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile); InputStreamReader @in = null; OutputStream @out = null; try { @in = new InputStreamReader(new FileInputStream(dictInFile), encoding); @out = new FileOutputStream(dictOutFile); Dictionary dict = Dictionary.parseOneEntryPerLine(@in); dict.serialize(@out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { @in.close(); @out.close(); } catch (IOException) { // sorry that this can fail } } }
public override void run(string[] args) { if (0 == args.Length) { Console.WriteLine(Help); } else { DoccatModel model = (new DoccatModelLoader()).load(new File(args[0])); DocumentCategorizerME doccat = new DocumentCategorizerME(model); ObjectStream <string> documentStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput))); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc"); perfMon.start(); try { string document; while ((document = documentStream.read()) != null) { double[] prob = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document)); string category = doccat.getBestCategory(prob); DocumentSample sample = new DocumentSample(category, document); Console.WriteLine(sample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); ChunkerModel model; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("chunker", modelOutFile, model); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { POSModel model = (new POSModelLoader()).load(new File(args[0])); POSTaggerME tagger = new POSTaggerME(model); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); string[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); Console.WriteLine(sample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public sealed override void run(string format, string[] args) { ModelUpdaterParams @params = validateAndParseParams <ModelUpdaterParams>(ArgumentParser.filter(args, typeof(ModelUpdaterParams)), typeof(ModelUpdaterParams)); // Load model to be updated Jfile modelFile = @params.Model; ParserModel originalParserModel = (new ParserModelLoader()).load(modelFile); ObjectStreamFactory factory = getStreamFactory(format); string[] fargs = ArgumentParser.filter(args, factory.Parameters); validateFactoryArgs(factory, fargs); ObjectStream <Parse> sampleStream = factory.create <Parse>(fargs); ParserModel updatedParserModel; try { updatedParserModel = trainAndUpdate(originalParserModel, sampleStream, @params); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("parser", modelFile, updatedParserModel); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { Detokenizer detokenizer = new DictionaryDetokenizer((new DetokenizationDictionaryLoader()).load(new File(args[0]))); ObjectStream <string> tokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string tokenizedLine; while ((tokenizedLine = tokenizedLineStream.read()) != null) { // white space tokenize line string[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine); Console.WriteLine(detokenizer.detokenize(tokens, null)); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public override ObjectStream <ChunkSample> create(string[] args) { Parameters @params = ArgumentParser.parse(args, typeof(Parameters)); language = @params.Lang; FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data); ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding); ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream); if (@params.Start != null && @params.Start > -1) { sampleStream.Start = @params.Start.Value; } if (@params.End != null && @params.End > -1) { sampleStream.End = @params.End.Value; } return(sampleStream); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } POSTaggerEvaluationMonitor missclassifiedListener = null; if (@params.Misclassified.Value) { missclassifiedListener = new POSEvaluationErrorListener(); } POSTaggerFineGrainedReportListener reportListener = null; Jfile reportFile = @params.ReportOutputFile; OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new POSTaggerFineGrainedReportListener(reportOutputStream); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message); } } POSTaggerCrossValidator validator; try { validator = new POSTaggerCrossValidator(@params.Lang, mlParams, @params.Dict, @params.Ngram, @params.TagDictCutoff, @params.Factory, missclassifiedListener, reportListener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); if (reportListener != null) { Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath); reportListener.writeReport(); try { // TODO: is it a problem to close the stream now? reportOutputStream.close(); } catch (IOException) { // nothing to do } } Console.WriteLine(); Console.WriteLine("Accuracy: " + validator.WordAccuracy); }
public override void run(string format, string[] args) { base.run(format, args); POSModel model = (new POSModelLoader()).load(@params.Model); POSTaggerEvaluationMonitor missclassifiedListener = null; if (@params.Misclassified.Value) { missclassifiedListener = new POSEvaluationErrorListener(); } POSTaggerFineGrainedReportListener reportListener = null; File reportFile = @params.ReportOutputFile; OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new POSTaggerFineGrainedReportListener(reportOutputStream); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message); } } POSEvaluator evaluator = new POSEvaluator(new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener, reportListener); Console.Write("Evaluating ... "); try { evaluator.evaluate(sampleStream); } catch (IOException e) { Console.Error.WriteLine("failed"); throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); if (reportListener != null) { Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath); reportListener.writeReport(); try { // TODO: is it a problem to close the stream now? reportOutputStream.close(); } catch (IOException) { // nothing to do } } Console.WriteLine(); Console.WriteLine("Accuracy: " + evaluator.WordAccuracy); }
public override void run(string[] args) { Parameters @params = validateAndParseParams(args, typeof(Parameters)); File testData = new File(@params.CensusData); File dictOutFile = new File(@params.Dict); CmdLineUtil.checkInputFile("Name data", testData); CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile); FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData); ObjectStream <StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(@params.Encoding)); Dictionary mDictionary; try { Console.WriteLine("Creating Dictionary..."); mDictionary = createDictionary(sampleStream); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry this can fail.. } } Console.WriteLine("Saving Dictionary..."); OutputStream @out = null; try { @out = new FileOutputStream(dictOutFile); mDictionary.serialize(@out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while writing dictionary file: " + e.Message, e); } finally { if (@out != null) { try { @out.close(); } catch (IOException e) { // file might be damaged throw new TerminateToolException(-1, "Attention: Failed to correctly write dictionary:" + e.Message, e); } } } }