public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); char[] eos = null; if (@params.EosChars != null) { eos = @params.EosChars.ToCharArray(); } SentenceModel model; try { Dictionary dict = loadDict(@params.AbbDict); SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos); model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("sentence detector", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (TrainUtil.isSequenceTraining(mlParams.Settings)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile); TokenizerModel model; try { Dictionary dict = loadDict(@params.AbbDict); TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null); model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("tokenizer", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen); // TODO: Support Custom resources: // Must be loaded into memory, or written to tmp file until descriptor // is loaded which defines parses when model is loaded IDictionary <string, object> resources = loadResources(@params.Resources); CmdLineUtil.checkOutputFile("name finder model", modelOutFile); if (@params.NameTypes != null) { string[] nameTypes = @params.NameTypes.Split(",", true); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); } TokenNameFinderModel model; try { model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("name finder", modelOutFile, model); }
public override void run(string[] args) { if (args.Length != 2) { Console.WriteLine(Help); } else { File parserModelInFile = new File(args[0]); ParserModel parserModel = (new ParserModelLoader()).load(parserModelInFile); File taggerModelInFile = new File(args[1]); POSModel taggerModel = (new POSModelLoader()).load(taggerModelInFile); ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel); CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel); } }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); ChunkerModel model; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("chunker", modelOutFile, model); }
public sealed override void run(string format, string[] args) { ModelUpdaterParams @params = validateAndParseParams <ModelUpdaterParams>(ArgumentParser.filter(args, typeof(ModelUpdaterParams)), typeof(ModelUpdaterParams)); // Load model to be updated Jfile modelFile = @params.Model; ParserModel originalParserModel = (new ParserModelLoader()).load(modelFile); ObjectStreamFactory factory = getStreamFactory(format); string[] fargs = ArgumentParser.filter(args, factory.Parameters); validateFactoryArgs(factory, fargs); ObjectStream <Parse> sampleStream = factory.create <Parse>(fargs); ParserModel updatedParserModel; try { updatedParserModel = trainAndUpdate(originalParserModel, sampleStream, @params); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("parser", modelFile, updatedParserModel); }
// TODO: Add param to train tree insert parser public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true); if (mlParams != null) { if (!TrainUtil.isValid(mlParams.getSettings("build"))) { throw new TerminateToolException(1, "Build training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("check"))) { throw new TerminateToolException(1, "Check training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("attach"))) { throw new TerminateToolException(1, "Attach training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("tagger"))) { throw new TerminateToolException(1, "Tagger training parameters are invalid!"); } if (!TrainUtil.isValid(mlParams.getSettings("chunker"))) { throw new TerminateToolException(1, "Chunker training parameters are invalid!"); } } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } Jfile modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("parser model", modelOutFile); ParserModel model; try { // TODO hard-coded language reference HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(new InputStreamReader(new FileInputStream(@params.HeadRules), @params.Encoding)); var type = parseParserType(@params.ParserType); if (@params.Fun.Value) { Parse.useFunctionTags(true); } if (ParserType.CHUNKING == type) { model = Parser.train(@params.Lang, sampleStream, rules, mlParams); } else if (ParserType.TREEINSERT == type) { model = opennlp.tools.parser.treeinsert.Parser.train(@params.Lang, sampleStream, rules, mlParams); } else { throw new IllegalStateException(); } } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("parser", modelOutFile, model); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true); if (mlParams != null && !TrainUtil.isValid(mlParams.Settings)) { throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!"); } if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(@params.Type).ToString()); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile); Dictionary ngramDict = null; int?ngramCutoff = @params.Ngram; if (ngramCutoff != null) { Console.Error.Write("Building ngram dictionary ... "); try { ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff.Value); sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while building NGram Dictionary: " + e.Message, e); } Console.Error.WriteLine("done"); } POSTaggerFactory postaggerFactory = null; try { postaggerFactory = POSTaggerFactory.create(@params.Factory, ngramDict, null); } catch (InvalidFormatException e) { throw new TerminateToolException(-1, e.Message, e); } if (@params.Dict != null) { try { postaggerFactory.TagDictionary = postaggerFactory.createTagDictionary(@params.Dict); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while loading POS Dictionary: " + e.Message, e); } } if (@params.TagDictCutoff != null) { try { TagDictionary dict = postaggerFactory.TagDictionary; if (dict == null) { dict = postaggerFactory.createEmptyTagDictionary(); postaggerFactory.TagDictionary = dict; } if (dict is MutableTagDictionary) { POSTaggerME.populatePOSDictionary(sampleStream, (MutableTagDictionary)dict, @params.TagDictCutoff.Value); } else { throw new System.ArgumentException("Can't extend a POSDictionary that does not implement MutableTagDictionary."); } sampleStream.reset(); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while creating/extending POS Dictionary: " + e.Message, e); } } POSModel model; try { model = POSTaggerME.train(@params.Lang, sampleStream, mlParams, postaggerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("pos tagger", modelOutFile, model); }