public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); char[] eos = null; if (@params.EosChars != null) { eos = @params.EosChars.ToCharArray(); } SentenceModel model; try { Dictionary dict = loadDict(@params.AbbDict); SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos); model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("sentence detector", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile); TokenizerModel model; try { Dictionary dict = loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("tokenizer", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen); // TODO: Support Custom resources: // Must be loaded into memory, or written to tmp file until descriptor // is loaded which defines parses when model is loaded IDictionary <string, object> resources = loadResources(@params.Resources); CmdLineUtil.checkOutputFile("name finder model", modelOutFile); if (@params.NameTypes != null) { string[] nameTypes = @params.NameTypes.Split(",", true); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); } TokenNameFinderModel model; try { model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("name finder", modelOutFile, model); }
public override void run(string[] args) { Params @params = validateAndParseParams(args, typeof(Params)); File dictInFile = @params.InputFile; File dictOutFile = @params.OutputFile; Charset encoding = @params.Encoding; CmdLineUtil.checkInputFile("dictionary input file", dictInFile); CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile); InputStreamReader @in = null; OutputStream @out = null; try { @in = new InputStreamReader(new FileInputStream(dictInFile), encoding); @out = new FileOutputStream(dictOutFile); Dictionary dict = Dictionary.parseOneEntryPerLine(@in); dict.serialize(@out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { @in.close(); @out.close(); } catch (IOException) { // sorry that this can fail } } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); ChunkerModel model; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("chunker", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } POSTaggerEvaluationMonitor missclassifiedListener = null; if (@params.Misclassified.Value) { missclassifiedListener = new POSEvaluationErrorListener(); } POSTaggerFineGrainedReportListener reportListener = null; Jfile reportFile = @params.ReportOutputFile; OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new POSTaggerFineGrainedReportListener(reportOutputStream); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message); } } POSTaggerCrossValidator validator; try { validator = new POSTaggerCrossValidator(@params.Lang, mlParams, @params.Dict, @params.Ngram, @params.TagDictCutoff, @params.Factory, missclassifiedListener, reportListener); validator.evaluate(sampleStream, @params.Folds.Value); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); if (reportListener != null) { Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath); reportListener.writeReport(); try { // TODO: is it a problem to close the stream now? reportOutputStream.close(); } catch (IOException) { // nothing to do } } Console.WriteLine(); Console.WriteLine("Accuracy: " + validator.WordAccuracy); }
public override void run(string format, string[] args) { base.run(format, args); POSModel model = (new POSModelLoader()).load(@params.Model); POSTaggerEvaluationMonitor missclassifiedListener = null; if (@params.Misclassified.Value) { missclassifiedListener = new POSEvaluationErrorListener(); } POSTaggerFineGrainedReportListener reportListener = null; File reportFile = @params.ReportOutputFile; OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new POSTaggerFineGrainedReportListener(reportOutputStream); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message); } } POSEvaluator evaluator = new POSEvaluator(new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener, reportListener); Console.Write("Evaluating ... "); try { evaluator.evaluate(sampleStream); } catch (IOException e) { Console.Error.WriteLine("failed"); throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); if (reportListener != null) { Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath); reportListener.writeReport(); try { // TODO: is it a problem to close the stream now? reportOutputStream.close(); } catch (IOException) { // nothing to do } } Console.WriteLine(); Console.WriteLine("Accuracy: " + evaluator.WordAccuracy); }
public override void run(string[] args) { Parameters @params = validateAndParseParams(args, typeof(Parameters)); File testData = new File(@params.CensusData); File dictOutFile = new File(@params.Dict); CmdLineUtil.checkInputFile("Name data", testData); CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile); FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData); ObjectStream <StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(@params.Encoding)); Dictionary mDictionary; try { Console.WriteLine("Creating Dictionary..."); mDictionary = createDictionary(sampleStream); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry this can fail.. } } Console.WriteLine("Saving Dictionary..."); OutputStream @out = null; try { @out = new FileOutputStream(dictOutFile); mDictionary.serialize(@out); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while writing dictionary file: " + e.Message, e); } finally { if (@out != null) { try { @out.close(); } catch (IOException e) { // file might be damaged throw new TerminateToolException(-1, "Attention: Failed to correctly write dictionary:" + e.Message, e); } } } }
// TODO: Add param to train tree insert parser public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.getSettings("build"))) { throw new TerminateToolException(1, "Build training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("check"))) { throw new TerminateToolException(1, "Check training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("attach"))) { throw new TerminateToolException(1, "Attach training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("tagger"))) { throw new TerminateToolException(1, "Tagger training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("chunker"))) { throw new TerminateToolException(1, "Chunker training parameters are invalid!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("parser model", modelOutFile); ParserModel model; try { // TODO hard-coded language reference HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(new InputStreamReader(new FileInputStream(@params.HeadRules), @params.Encoding)); var type = parseParserType(@params.ParserType); if (@params.Fun.Value) { Parse.useFunctionTags(true); } if (ParserType.CHUNKING == type) { model = Parser.train(@params.Lang, sampleStream, rules, mlParams); } else if (ParserType.TREEINSERT == type) { model = opennlp.tools.parser.treeinsert.Parser.train(@params.Lang, sampleStream, rules, mlParams); } else { throw new IllegalStateException(); } } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("parser", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true); if (mlParams != null && !TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(@params.Type).ToString()); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile); Dictionary ngramDict = null; int?ngramCutoff = @params.Ngram; if (ngramCutoff != null) { Console.Error.Write("Building ngram dictionary ... "); try { ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff.Value); sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while building NGram Dictionary: " + e.Message, e); } Console.Error.WriteLine("done"); } POSTaggerFactory postaggerFactory = null; try { postaggerFactory = POSTaggerFactory.create(@params.Factory, ngramDict, null); } catch (InvalidFormatException e) { throw new TerminateToolException(-1, e.Message, e); } if (@params.Dict != null) { try { postaggerFactory.TagDictionary = postaggerFactory.createTagDictionary(@params.Dict); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while loading POS Dictionary: " + e.Message, e); } } if (@params.TagDictCutoff != null) { try { TagDictionary dict = postaggerFactory.TagDictionary; if (dict == null) { dict = postaggerFactory.createEmptyTagDictionary(); postaggerFactory.TagDictionary = dict; } if (dict is MutableTagDictionary) { POSTaggerME.populatePOSDictionary(sampleStream, (MutableTagDictionary)dict, @params.TagDictCutoff.Value); } else { throw new System.ArgumentException("Can't extend a POSDictionary that does not implement MutableTagDictionary."); } sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while creating/extending POS Dictionary: " + e.Message, e); } } POSModel model; try { model = POSTaggerME.train(@params.Lang, sampleStream, mlParams, postaggerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("pos tagger", modelOutFile, model); }