public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } IList <EvaluationMonitor <ChunkSample> > listeners = new LinkedList <EvaluationMonitor <ChunkSample> >(); ChunkerDetailedFMeasureListener detailedFMeasureListener = null; if (@params.Misclassified.Value) { listeners.Add(new ChunkEvaluationErrorListener()); } if (@params.DetailedF.Value) { detailedFMeasureListener = new ChunkerDetailedFMeasureListener(); listeners.Add(detailedFMeasureListener); } ChunkerCrossValidator validator; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); validator = new ChunkerCrossValidator(@params.Lang, mlParams, chunkerFactory, listeners.ToArray()); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } if (detailedFMeasureListener == null) { FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); } else { Console.WriteLine(detailedFMeasureListener.ToString()); } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); char[] eos = null; if (@params.EosChars != null) { eos = @params.EosChars.ToCharArray(); } SentenceModel model; try { Dictionary dict = loadDict(@params.AbbDict); SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos); model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("sentence detector", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile); TokenizerModel model; try { Dictionary dict = loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("tokenizer", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } SDCrossValidator validator; SentenceDetectorEvaluationMonitor errorListener = null; if (@params.Misclassified.Value) { errorListener = new SentenceEvaluationErrorListener(); } char[] eos = null; if (@params.EosChars != null) { eos = @params.EosChars.ToCharArray(); } try { Dictionary abbreviations = SentenceDetectorTrainerTool.loadDict(@params.AbbDict); SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, abbreviations, eos); validator = new SDCrossValidator(@params.Lang, mlParams, sdFactory, errorListener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen); // TODO: Support Custom resources: // Must be loaded into memory, or written to tmp file until descriptor // is loaded which defines parses when model is loaded IDictionary <string, object> resources = loadResources(@params.Resources); CmdLineUtil.checkOutputFile("name finder model", modelOutFile); if (@params.NameTypes != null) { string[] nameTypes = @params.NameTypes.Split(",", true); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); } TokenNameFinderModel model; try { model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("name finder", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } TokenizerCrossValidator validator; TokenizerEvaluationMonitor listener = null; if (@params.Misclassified.Value) { listener = new TokenEvaluationErrorListener(); } try { Dictionary dict = TokenizerTrainerTool.loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); validator = new TokenizerCrossValidator(mlParams, tokFactory, listener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } FMeasure result = validator.FMeasure; Console.WriteLine(result.ToString()); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); ChunkerModel model; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("chunker", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } POSTaggerEvaluationMonitor missclassifiedListener = null; if (@params.Misclassified.Value) { missclassifiedListener = new POSEvaluationErrorListener(); } POSTaggerFineGrainedReportListener reportListener = null; Jfile reportFile = @params.ReportOutputFile; OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new POSTaggerFineGrainedReportListener(reportOutputStream); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message); } } POSTaggerCrossValidator validator; try { validator = new POSTaggerCrossValidator(@params.Lang, mlParams, @params.Dict, @params.Ngram, @params.TagDictCutoff, @params.Factory, missclassifiedListener, reportListener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); if (reportListener != null) { Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath); reportListener.writeReport(); try { // TODO: is it a problem to close the stream now? reportOutputStream.close(); } catch (IOException) { // nothing to do } } Console.WriteLine(); Console.WriteLine("Accuracy: " + validator.WordAccuracy); }
// TODO: Add param to train tree insert parser public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.getSettings("build"))) { throw new TerminateToolException(1, "Build training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("check"))) { throw new TerminateToolException(1, "Check training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("attach"))) { throw new TerminateToolException(1, "Attach training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("tagger"))) { throw new TerminateToolException(1, "Tagger training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("chunker"))) { throw new TerminateToolException(1, "Chunker training parameters are invalid!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("parser model", modelOutFile); ParserModel model; try { // TODO hard-coded language reference HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(new InputStreamReader(new FileInputStream(@params.HeadRules), @params.Encoding)); var type = parseParserType(@params.ParserType); if (@params.Fun.Value) { Parse.useFunctionTags(true); } if (ParserType.CHUNKING == type) { model = Parser.train(@params.Lang, sampleStream, rules, mlParams); } else if (ParserType.TREEINSERT == type) { model = opennlp.tools.parser.treeinsert.Parser.train(@params.Lang, sampleStream, rules, mlParams); } else { throw new IllegalStateException(); } } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("parser", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true); if (mlParams != null && !TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(@params.Type).ToString()); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile); Dictionary ngramDict = null; int?ngramCutoff = @params.Ngram; if (ngramCutoff != null) { Console.Error.Write("Building ngram dictionary ... "); try { ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff.Value); sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while building NGram Dictionary: " + e.Message, e); } Console.Error.WriteLine("done"); } POSTaggerFactory postaggerFactory = null; try { postaggerFactory = POSTaggerFactory.create(@params.Factory, ngramDict, null); } catch (InvalidFormatException e) { throw new TerminateToolException(-1, e.Message, e); } if (@params.Dict != null) { try { postaggerFactory.TagDictionary = postaggerFactory.createTagDictionary(@params.Dict); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while loading POS Dictionary: " + e.Message, e); } } if (@params.TagDictCutoff != null) { try { TagDictionary dict = postaggerFactory.TagDictionary; if (dict == null) { dict = postaggerFactory.createEmptyTagDictionary(); postaggerFactory.TagDictionary = dict; } if (dict is MutableTagDictionary) { POSTaggerME.populatePOSDictionary(sampleStream, (MutableTagDictionary)dict, @params.TagDictCutoff.Value); } else { throw new System.ArgumentException("Can't extend a POSDictionary that does not implement MutableTagDictionary."); } sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while creating/extending POS Dictionary: " + e.Message, e); } } POSModel model; try { model = POSTaggerME.train(@params.Lang, sampleStream, mlParams, postaggerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("pos tagger", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } sbyte[] featureGeneratorBytes = TokenNameFinderTrainerTool.openFeatureGeneratorBytes(@params.Featuregen); IDictionary <string, object> resources = TokenNameFinderTrainerTool.loadResources(@params.Resources); IList <EvaluationMonitor <NameSample> > listeners = new LinkedList <EvaluationMonitor <NameSample> >(); if (@params.Misclassified.Value) { listeners.Add(new NameEvaluationErrorListener()); } TokenNameFinderDetailedFMeasureListener detailedFListener = null; if (@params.DetailedF.Value) { detailedFListener = new TokenNameFinderDetailedFMeasureListener(); listeners.Add(detailedFListener); } TokenNameFinderCrossValidator validator; try { validator = new TokenNameFinderCrossValidator(@params.Lang, @params.Type, mlParams, featureGeneratorBytes, resources, listeners.ToArray()); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); Console.WriteLine(); if (detailedFListener == null) { Console.WriteLine(validator.FMeasure); } else { Console.WriteLine(detailedFListener.ToString()); } }