public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            IList <EvaluationMonitor <ChunkSample> > listeners = new LinkedList <EvaluationMonitor <ChunkSample> >();
            ChunkerDetailedFMeasureListener          detailedFMeasureListener = null;

            if (@params.Misclassified.Value)
            {
                listeners.Add(new ChunkEvaluationErrorListener());
            }
            if (@params.DetailedF.Value)
            {
                detailedFMeasureListener = new ChunkerDetailedFMeasureListener();
                listeners.Add(detailedFMeasureListener);
            }

            ChunkerCrossValidator validator;

            try
            {
                ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory);

                validator = new ChunkerCrossValidator(@params.Lang, mlParams, chunkerFactory, listeners.ToArray());
                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            if (detailedFMeasureListener == null)
            {
                FMeasure result = validator.FMeasure;
                Console.WriteLine(result.ToString());
            }
            else
            {
                Console.WriteLine(detailedFMeasureListener.ToString());
            }
        }
Esempio n. 2
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            Jfile modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            char[] eos = null;
            if (@params.EosChars != null)
            {
                eos = @params.EosChars.ToCharArray();
            }

            SentenceModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);
                SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos);
                model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("sentence detector", modelOutFile, model);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (!TrainUtil.isValid(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!");
                }

                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);

            TokenizerModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            SDCrossValidator validator;

            SentenceDetectorEvaluationMonitor errorListener = null;

            if (@params.Misclassified.Value)
            {
                errorListener = new SentenceEvaluationErrorListener();
            }

            char[] eos = null;
            if (@params.EosChars != null)
            {
                eos = @params.EosChars.ToCharArray();
            }

            try
            {
                Dictionary abbreviations          = SentenceDetectorTrainerTool.loadDict(@params.AbbDict);
                SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, abbreviations, eos);
                validator = new SDCrossValidator(@params.Lang, mlParams, sdFactory, errorListener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            FMeasure result = validator.FMeasure;

            Console.WriteLine(result.ToString());
        }
Esempio n. 5
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen);


            // TODO: Support Custom resources:
            //       Must be loaded into memory, or written to tmp file until descriptor
            //       is loaded which defines parses when model is loaded

            IDictionary <string, object> resources = loadResources(@params.Resources);

            CmdLineUtil.checkOutputFile("name finder model", modelOutFile);

            if (@params.NameTypes != null)
            {
                string[] nameTypes = @params.NameTypes.Split(",", true);
                sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
            }

            TokenNameFinderModel model;

            try
            {
                model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("name finder", modelOutFile, model);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            TokenizerCrossValidator validator;

            TokenizerEvaluationMonitor listener = null;

            if (@params.Misclassified.Value)
            {
                listener = new TokenEvaluationErrorListener();
            }

            try
            {
                Dictionary dict = TokenizerTrainerTool.loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                validator = new TokenizerCrossValidator(mlParams, tokFactory, listener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            FMeasure result = validator.FMeasure;

            Console.WriteLine(result.ToString());
        }
Esempio n. 7
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            ChunkerModel model;

            try
            {
                ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory);
                model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("chunker", modelOutFile, model);
        }
Esempio n. 8
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            POSTaggerEvaluationMonitor missclassifiedListener = null;

            if (@params.Misclassified.Value)
            {
                missclassifiedListener = new POSEvaluationErrorListener();
            }

            POSTaggerFineGrainedReportListener reportListener = null;
            Jfile        reportFile         = @params.ReportOutputFile;
            OutputStream reportOutputStream = null;

            if (reportFile != null)
            {
                CmdLineUtil.checkOutputFile("Report Output File", reportFile);
                try
                {
                    reportOutputStream = new FileOutputStream(reportFile);
                    reportListener     = new POSTaggerFineGrainedReportListener(reportOutputStream);
                }
                catch (FileNotFoundException e)
                {
                    throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message);
                }
            }

            POSTaggerCrossValidator validator;

            try
            {
                validator = new POSTaggerCrossValidator(@params.Lang, mlParams, @params.Dict, @params.Ngram, @params.TagDictCutoff, @params.Factory, missclassifiedListener, reportListener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            Console.WriteLine("done");

            if (reportListener != null)
            {
                Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath);
                reportListener.writeReport();

                try
                {
                    // TODO: is it a problem to close the stream now?
                    reportOutputStream.close();
                }
                catch (IOException)
                {
                    // nothing to do
                }
            }

            Console.WriteLine();

            Console.WriteLine("Accuracy: " + validator.WordAccuracy);
        }
Esempio n. 9
0
        // TODO: Add param to train tree insert parser
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true);

            if (mlParams != null)
            {
                if (!TrainUtil.isValid(mlParams.getSettings("build")))
                {
                    throw new TerminateToolException(1, "Build training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("check")))
                {
                    throw new TerminateToolException(1, "Check training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("attach")))
                {
                    throw new TerminateToolException(1, "Attach training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("tagger")))
                {
                    throw new TerminateToolException(1, "Tagger training parameters are invalid!");
                }

                if (!TrainUtil.isValid(mlParams.getSettings("chunker")))
                {
                    throw new TerminateToolException(1, "Chunker training parameters are invalid!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            Jfile modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("parser model", modelOutFile);

            ParserModel model;

            try
            {
                // TODO hard-coded language reference
                HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(new InputStreamReader(new FileInputStream(@params.HeadRules), @params.Encoding));

                var type = parseParserType(@params.ParserType);
                if (@params.Fun.Value)
                {
                    Parse.useFunctionTags(true);
                }

                if (ParserType.CHUNKING == type)
                {
                    model = Parser.train(@params.Lang, sampleStream, rules, mlParams);
                }
                else if (ParserType.TREEINSERT == type)
                {
                    model = opennlp.tools.parser.treeinsert.Parser.train(@params.Lang, sampleStream, rules, mlParams);
                }
                else
                {
                    throw new IllegalStateException();
                }
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("parser", modelOutFile, model);
        }
Esempio n. 10
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, true);
            if (mlParams != null && !TrainUtil.isValid(mlParams.Settings))
            {
                throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!");
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
                mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(@params.Type).ToString());
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);

            Dictionary ngramDict = null;

            int?ngramCutoff = @params.Ngram;

            if (ngramCutoff != null)
            {
                Console.Error.Write("Building ngram dictionary ... ");
                try
                {
                    ngramDict = POSTaggerME.buildNGramDictionary(sampleStream, ngramCutoff.Value);
                    sampleStream.reset();
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while building NGram Dictionary: " + e.Message, e);
                }
                Console.Error.WriteLine("done");
            }

            POSTaggerFactory postaggerFactory = null;

            try
            {
                postaggerFactory = POSTaggerFactory.create(@params.Factory, ngramDict, null);
            }
            catch (InvalidFormatException e)
            {
                throw new TerminateToolException(-1, e.Message, e);
            }

            if (@params.Dict != null)
            {
                try
                {
                    postaggerFactory.TagDictionary = postaggerFactory.createTagDictionary(@params.Dict);
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while loading POS Dictionary: " + e.Message, e);
                }
            }

            if (@params.TagDictCutoff != null)
            {
                try
                {
                    TagDictionary dict = postaggerFactory.TagDictionary;
                    if (dict == null)
                    {
                        dict = postaggerFactory.createEmptyTagDictionary();
                        postaggerFactory.TagDictionary = dict;
                    }
                    if (dict is MutableTagDictionary)
                    {
                        POSTaggerME.populatePOSDictionary(sampleStream, (MutableTagDictionary)dict, @params.TagDictCutoff.Value);
                    }
                    else
                    {
                        throw new System.ArgumentException("Can't extend a POSDictionary that does not implement MutableTagDictionary.");
                    }
                    sampleStream.reset();
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while creating/extending POS Dictionary: " + e.Message, e);
                }
            }

            POSModel model;

            try
            {
                model = POSTaggerME.train(@params.Lang, sampleStream, mlParams, postaggerFactory);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("pos tagger", modelOutFile, model);
        }
Esempio n. 11
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            sbyte[] featureGeneratorBytes = TokenNameFinderTrainerTool.openFeatureGeneratorBytes(@params.Featuregen);

            IDictionary <string, object> resources = TokenNameFinderTrainerTool.loadResources(@params.Resources);

            IList <EvaluationMonitor <NameSample> > listeners = new LinkedList <EvaluationMonitor <NameSample> >();

            if (@params.Misclassified.Value)
            {
                listeners.Add(new NameEvaluationErrorListener());
            }
            TokenNameFinderDetailedFMeasureListener detailedFListener = null;

            if (@params.DetailedF.Value)
            {
                detailedFListener = new TokenNameFinderDetailedFMeasureListener();
                listeners.Add(detailedFListener);
            }

            TokenNameFinderCrossValidator validator;

            try
            {
                validator = new TokenNameFinderCrossValidator(@params.Lang, @params.Type, mlParams, featureGeneratorBytes, resources, listeners.ToArray());
                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            Console.WriteLine("done");

            Console.WriteLine();

            if (detailedFListener == null)
            {
                Console.WriteLine(validator.FMeasure);
            }
            else
            {
                Console.WriteLine(detailedFListener.ToString());
            }
        }