Esempio n. 1
0
        public void Chunker()
        {
            using var modelIn = new java.io.FileInputStream(GetModel("en-chunker.bin"));

            var model   = new opennlp.tools.chunker.ChunkerModel(modelIn);
            var chunker = new opennlp.tools.chunker.ChunkerME(model);

            var sent = new[]
            {
                "Rockwell", "International", "Corp.", "'s", "Tulsa", "unit", "said", "it", "signed",
                "a", "tentative", "agreement", "extending", "its", "contract", "with", "Boeing", "Co.",
                "to", "provide", "structural", "parts", "for", "Boeing", "'s", "747", "jetliners", "."
            };
            var pos = new[]
            {
                "NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD", "PRP", "VBD", "DT", "JJ", "NN", "VBG", "PRP$",
                "NN", "IN", "NNP", "NNP", "TO", "VB", "JJ", "NNS", "IN", "NNP", "POS", "CD", "NNS", "."
            };

            var tags = chunker.chunk(sent, pos);

            System.Console.WriteLine(string.Join(";", tags));
            Assert.AreEqual(28, tags.Length);

            var probs = chunker.probs();

            System.Console.WriteLine(string.Join(";", probs));
            Assert.AreEqual(28, probs.Length);
        }
Esempio n. 2
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            ChunkerModel model;

            try
            {
                ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory);
                model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("chunker", modelOutFile, model);
        }