Esempio n. 1
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            Jfile modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            char[] eos = null;
            if (@params.EosChars != null)
            {
                eos = @params.EosChars.ToCharArray();
            }

            SentenceModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);
                SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos);
                model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("sentence detector", modelOutFile, model);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (!TrainUtil.isValid(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!");
                }

                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);

            TokenizerModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
        }
Esempio n. 3
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen);


            // TODO: Support Custom resources:
            //       Must be loaded into memory, or written to tmp file until descriptor
            //       is loaded which defines parses when model is loaded

            IDictionary <string, object> resources = loadResources(@params.Resources);

            CmdLineUtil.checkOutputFile("name finder model", modelOutFile);

            if (@params.NameTypes != null)
            {
                string[] nameTypes = @params.NameTypes.Split(",", true);
                sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
            }

            TokenNameFinderModel model;

            try
            {
                model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("name finder", modelOutFile, model);
        }
        public override void run(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine(Help);
            }
            else
            {
                File        parserModelInFile = new File(args[0]);
                ParserModel parserModel       = (new ParserModelLoader()).load(parserModelInFile);

                File     taggerModelInFile = new File(args[1]);
                POSModel taggerModel       = (new POSModelLoader()).load(taggerModelInFile);

                ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel);

                CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel);
            }
        }
Esempio n. 5
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            ChunkerModel model;

            try
            {
                ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory);
                model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("chunker", modelOutFile, model);
        }
Esempio n. 6
0
        public sealed override void run(string format, string[] args)
        {
            ModelUpdaterParams @params = validateAndParseParams <ModelUpdaterParams>(ArgumentParser.filter(args, typeof(ModelUpdaterParams)), typeof(ModelUpdaterParams));

            // Load model to be updated
            Jfile       modelFile           = @params.Model;
            ParserModel originalParserModel = (new ParserModelLoader()).load(modelFile);

            ObjectStreamFactory factory = getStreamFactory(format);

            string[] fargs = ArgumentParser.filter(args, factory.Parameters);
            validateFactoryArgs(factory, fargs);
            ObjectStream <Parse> sampleStream = factory.create <Parse>(fargs);

            ParserModel updatedParserModel;

            try
            {
                updatedParserModel = trainAndUpdate(originalParserModel, sampleStream, @params);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("parser", modelFile, updatedParserModel);
        }
Esempio n. 7
0
        // TODO: Add param to train tree insert parser
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true);

            if (mlParams != null)
            {
                if (!TrainUtil.isValid(mlParams.getSettings("build")))
                {
                    throw new TerminateToolException(1, "Build training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("check")))
                {
                    throw new TerminateToolException(1, "Check training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("attach")))
                {
                    throw new TerminateToolException(1, "Attach training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("tagger")))
                {
                    throw new TerminateToolException(1, "Tagger training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("chunker")))
                {
                    throw new TerminateToolException(1, "Chunker training parameters are invalid!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            Jfile modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("parser model", modelOutFile);

            ParserModel model;

            try
            {
                // TODO hard-coded language reference
                HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(new InputStreamReader(new FileInputStream(@params.HeadRules), @params.Encoding));

                var type = parseParserType(@params.ParserType);
                if (@params.Fun.Value)
                {
                    Parse.useFunctionTags(true);
                }

                if (ParserType.CHUNKING == type)
                {
                    model = Parser.train(@params.Lang, sampleStream, rules, mlParams);
                }
                else if (ParserType.TREEINSERT == type)
                {
                    model = opennlp.tools.parser.treeinsert.Parser.train(@params.Lang, sampleStream, rules, mlParams);
                }
                else
                {
                    throw new IllegalStateException();
                }
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("parser", modelOutFile, model);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true);
            if (mlParams != null && !TrainUtil.isValid(mlParams.Settings))
            {
                throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!");
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
                mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(@params.Type).ToString());
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);

            Dictionary ngramDict = null;

            int?ngramCutoff = @params.Ngram;

            if (ngramCutoff != null)
            {
                Console.Error.Write("Building ngram dictionary ... ");
                try
                {
                    ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff.Value);
                    sampleStream.reset();
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while building NGram Dictionary: " + e.Message, e);
                }
                Console.Error.WriteLine("done");
            }

            POSTaggerFactory postaggerFactory = null;

            try
            {
                postaggerFactory = POSTaggerFactory.create(@params.Factory, ngramDict, null);
            }
            catch (InvalidFormatException e)
            {
                throw new TerminateToolException(-1, e.Message, e);
            }

            if (@params.Dict != null)
            {
                try
                {
                    postaggerFactory.TagDictionary = postaggerFactory.createTagDictionary(@params.Dict);
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while loading POS Dictionary: " + e.Message, e);
                }
            }

            if (@params.TagDictCutoff != null)
            {
                try
                {
                    TagDictionary dict = postaggerFactory.TagDictionary;
                    if (dict == null)
                    {
                        dict = postaggerFactory.createEmptyTagDictionary();
                        postaggerFactory.TagDictionary = dict;
                    }
                    if (dict is MutableTagDictionary)
                    {
                        POSTaggerME.populatePOSDictionary(sampleStream, (MutableTagDictionary)dict, @params.TagDictCutoff.Value);
                    }
                    else
                    {
                        throw new System.ArgumentException("Can't extend a POSDictionary that does not implement MutableTagDictionary.");
                    }
                    sampleStream.reset();
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while creating/extending POS Dictionary: " + e.Message, e);
                }
            }

            POSModel model;

            try
            {
                model = POSTaggerME.train(@params.Lang, sampleStream, mlParams, postaggerFactory);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("pos tagger", modelOutFile, model);
        }