public void Chunker() { using var modelIn = new java.io.FileInputStream(GetModel("en-chunker.bin")); var model = new opennlp.tools.chunker.ChunkerModel(modelIn); var chunker = new opennlp.tools.chunker.ChunkerME(model); var sent = new[] { "Rockwell", "International", "Corp.", "'s", "Tulsa", "unit", "said", "it", "signed", "a", "tentative", "agreement", "extending", "its", "contract", "with", "Boeing", "Co.", "to", "provide", "structural", "parts", "for", "Boeing", "'s", "747", "jetliners", "." }; var pos = new[] { "NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD", "PRP", "VBD", "DT", "JJ", "NN", "VBG", "PRP$", "NN", "IN", "NNP", "NNP", "TO", "VB", "JJ", "NNS", "IN", "NNP", "POS", "CD", "NNS", "." }; var tags = chunker.chunk(sent, pos); System.Console.WriteLine(string.Join(";", tags)); Assert.AreEqual(28, tags.Length); var probs = chunker.probs(); System.Console.WriteLine(string.Join(";", probs)); Assert.AreEqual(28, probs.Length); }
public override void run(string format, string[] args) { base.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false); if (mlParams == null) { mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value); } File modelOutFile = @params.Model; CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile); ChunkerModel model; try { ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory); model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } CmdLineUtil.writeModel("chunker", modelOutFile, model); }