public override ObjectStream <T> create <T>(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            int typesToGenerate = 0;

            if (@params.Types.Contains("DNA"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_DNA_ENTITIES;
            }
            else if (@params.Types.Contains("protein"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_PROTEIN_ENTITIES;
            }
            else if (@params.Types.Contains("cell_type"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLTYPE_ENTITIES;
            }
            else if (@params.Types.Contains("cell_line"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLLINE_ENTITIES;
            }
            else if (@params.Types.Contains("RNA"))
            {
                typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES;
            }

            return(new BioNLP2004NameSampleStream(CmdLineUtil.openInFile(@params.Data), typesToGenerate));
        }
        internal void process()
        {
            ObjectStream <string> untokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

            ObjectStream <string> tokenizedLineStream = new WhitespaceTokenStream(new TokenizerStream(tokenizer, untokenizedLineStream));

            PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");

            perfMon.start();

            try
            {
                string tokenizedLine;
                while ((tokenizedLine = tokenizedLineStream.read()) != null)
                {
                    Console.WriteLine(tokenizedLine);
                    perfMon.incrementCounter();
                }
            }
            catch (IOException e)
            {
                CmdLineUtil.handleStdinIoError(e);
            }

            perfMon.stopAndPrintFinalResult();
        }
示例#3
0
        internal static sbyte[] openFeatureGeneratorBytes(Jfile featureGenDescriptorFile)
        {
            sbyte[] featureGeneratorBytes = null;
            // load descriptor file into memory
            if (featureGenDescriptorFile != null)
            {
                InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile);

                try
                {
                    featureGeneratorBytes = ModelUtil.read(bytesIn);
                }
                catch (IOException e)
                {
                    throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
                }
                finally
                {
                    try
                    {
                        bytesIn.close();
                    }
                    catch (IOException)
                    {
                        // sorry that this can fail
                    }
                }
            }
            return(featureGeneratorBytes);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            IList <EvaluationMonitor <ChunkSample> > listeners = new LinkedList <EvaluationMonitor <ChunkSample> >();
            ChunkerDetailedFMeasureListener          detailedFMeasureListener = null;

            if (@params.Misclassified.Value)
            {
                listeners.Add(new ChunkEvaluationErrorListener());
            }
            if (@params.DetailedF.Value)
            {
                detailedFMeasureListener = new ChunkerDetailedFMeasureListener();
                listeners.Add(detailedFMeasureListener);
            }

            ChunkerCrossValidator validator;

            try
            {
                ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory);

                validator = new ChunkerCrossValidator(@params.Lang, mlParams, chunkerFactory, listeners.ToArray());
                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            if (detailedFMeasureListener == null)
            {
                FMeasure result = validator.FMeasure;
                Console.WriteLine(result.ToString());
            }
            else
            {
                Console.WriteLine(detailedFMeasureListener.ToString());
            }
        }
示例#5
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            Jfile modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            char[] eos = null;
            if (@params.EosChars != null)
            {
                eos = @params.EosChars.ToCharArray();
            }

            SentenceModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);
                SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, dict, eos);
                model = SentenceDetectorME.train(@params.Lang, sampleStream, sdFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("sentence detector", modelOutFile, model);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);

            if (mlParams != null)
            {
                if (!TrainUtil.isValid(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Training parameters file '" + @params.Params + "' is invalid!");
                }

                if (TrainUtil.isSequenceTraining(mlParams.Settings))
                {
                    throw new TerminateToolException(1, "Sequence training is not supported!");
                }
            }

            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);

            TokenizerModel model;

            try
            {
                Dictionary dict = loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
        }
示例#7
0
        internal static Dictionary loadDict(Jfile f)
        {
            Dictionary dict = null;

            if (f != null)
            {
                CmdLineUtil.checkInputFile("abb dict", f);
                dict = new Dictionary(new FileInputStream(f));
            }
            return(dict);
        }
示例#8
0
        public override ObjectStream <CorefSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            CmdLineUtil.checkInputFile("Data", @params.Data);
            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new ParagraphStream(new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding));

            return(new CorefSampleDataStream(lineStream));
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            SDCrossValidator validator;

            SentenceDetectorEvaluationMonitor errorListener = null;

            if (@params.Misclassified.Value)
            {
                errorListener = new SentenceEvaluationErrorListener();
            }

            char[] eos = null;
            if (@params.EosChars != null)
            {
                eos = @params.EosChars.ToCharArray();
            }

            try
            {
                Dictionary abbreviations          = SentenceDetectorTrainerTool.loadDict(@params.AbbDict);
                SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(@params.Factory, @params.Lang, true, abbreviations, eos);
                validator = new SDCrossValidator(@params.Lang, mlParams, sdFactory, errorListener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            FMeasure result = validator.FMeasure;

            Console.WriteLine(result.ToString());
        }
示例#10
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            sbyte[] featureGeneratorBytes = openFeatureGeneratorBytes(@params.Featuregen);


            // TODO: Support Custom resources:
            //       Must be loaded into memory, or written to tmp file until descriptor
            //       is loaded which defines parses when model is loaded

            IDictionary <string, object> resources = loadResources(@params.Resources);

            CmdLineUtil.checkOutputFile("name finder model", modelOutFile);

            if (@params.NameTypes != null)
            {
                string[] nameTypes = @params.NameTypes.Split(",", true);
                sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
            }

            TokenNameFinderModel model;

            try
            {
                model = opennlp.tools.namefind.NameFinderME.train(@params.Lang, @params.Type, sampleStream, mlParams, featureGeneratorBytes, resources);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("name finder", modelOutFile, model);
        }
        public override ObjectStream <NameSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            return(new ADNameSampleStream(lineStream, @params.SplitHyphenatedTokens.Value));
        }
        public override ObjectStream <POSSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream, @params.ExpandME.Value, @params.IncludeFeatures.Value);

            return(sentenceStream);
        }
示例#13
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0]));

                ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        POSSample posSample;
                        try
                        {
                            posSample = POSSample.parse(line);
                        }
                        catch (InvalidFormatException)
                        {
                            Console.Error.WriteLine("Invalid format:");
                            Console.Error.WriteLine(line);
                            continue;
                        }

                        string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags);

                        Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
示例#14
0
	  public override ObjectStream<DocumentSample> create(string[] args)
	  {

		Parameters @params = ArgumentParser.parse(args, typeof(Parameters));
		language = @params.Lang;

		try
		{
		  return new LeipzigDoccatSampleStream(@params.Lang, 20, CmdLineUtil.openInFile(@params.Data));
		}
		catch (IOException e)
		{
		  throw new TerminateToolException(-1, "IO error while opening sample data: " + e.Message, e);
		}
	  }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            TokenizerCrossValidator validator;

            TokenizerEvaluationMonitor listener = null;

            if (@params.Misclassified.Value)
            {
                listener = new TokenEvaluationErrorListener();
            }

            try
            {
                Dictionary dict = TokenizerTrainerTool.loadDict(@params.AbbDict);

                TokenizerFactory tokFactory = TokenizerFactory.create(@params.Factory, @params.Lang, dict, @params.AlphaNumOpt.Value, null);
                validator = new TokenizerCrossValidator(mlParams, tokFactory, listener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            FMeasure result = validator.FMeasure;

            Console.WriteLine(result.ToString());
        }
示例#16
0
        public static int RunShellCommand(DirectoryInfo workingDir, string cmdLineString)
        {
            var cmdLine = CmdLineUtil.SplitCmdAndArgs(cmdLineString);
            var cmd     = cmdLine[0];
            var args    = cmdLine.Length > 1 ? cmdLine[1] : null;

            var psi = new System.Diagnostics.ProcessStartInfo(cmd, args);

            psi.WorkingDirectory = workingDir.FullName;
            psi.UseShellExecute  = false;

            var proc = System.Diagnostics.Process.Start(psi);

            proc.WaitForExit();

            return(proc.ExitCode);
        }
示例#17
0
        public override ObjectStream <NameSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            // TODO: support the other languages with this CoNLL.
            LANGUAGE lang;

            if ("en".Equals(@params.Lang))
            {
                lang     = LANGUAGE.EN;
                language = @params.Lang;
            }
            else if ("de".Equals(@params.Lang))
            {
                lang     = LANGUAGE.DE;
                language = @params.Lang;
            }
            else
            {
                throw new TerminateToolException(1, "Unsupported language: " + @params.Lang);
            }

            int typesToGenerate = 0;

            if (@params.Types.Contains("per"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_PERSON_ENTITIES;
            }
            if (@params.Types.Contains("org"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES;
            }
            if (@params.Types.Contains("loc"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES;
            }
            if (@params.Types.Contains("misc"))
            {
                typesToGenerate = typesToGenerate | Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
            }


            return(new Conll03NameSampleStream(lang, CmdLineUtil.openInFile(@params.Data), typesToGenerate));
        }
        public override void run(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine(Help);
            }
            else
            {
                File        parserModelInFile = new File(args[0]);
                ParserModel parserModel       = (new ParserModelLoader()).load(parserModelInFile);

                File     taggerModelInFile = new File(args[1]);
                POSModel taggerModel       = (new POSModelLoader()).load(taggerModelInFile);

                ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel);

                CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel);
            }
        }
示例#19
0
        public override ObjectStream <POSSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            ObjectStream <string> lineStream;

            try
            {
                lineStream = new PlainTextByLineStream(new InputStreamReader(CmdLineUtil.openInFile(@params.Data), "UTF-8"));
                //Console.Out = new PrintStream(System.out, true, "UTF-8");

                return(new ConllXPOSSampleStream(lineStream));
            }
            catch (UnsupportedEncodingException e)
            {
                // this shouldn't happen
                throw new TerminateToolException(-1, "UTF-8 encoding is not supported: " + e.Message, e);
            }
        }
        /// <summary>
        /// Perform sentence detection the input stream.
        ///
        /// A newline will be treated as a paragraph boundary.
        /// </summary>
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                SentenceModel model = (new SentenceModelLoader()).load(new File(args[0]));

                SentenceDetectorME sdetector = new SentenceDetectorME(model);

                ObjectStream <string> paraStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string para;
                    while ((para = paraStream.read()) != null)
                    {
                        string[] sents = sdetector.sentDetect(para);
                        foreach (string sentence in sents)
                        {
                            Console.WriteLine(sentence);
                        }

                        perfMon.incrementCounter(sents.Length);

                        Console.WriteLine();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
        public override void run(string[] args)
        {
            Params @params = validateAndParseParams(args, typeof(Params));

            File    dictInFile  = @params.InputFile;
            File    dictOutFile = @params.OutputFile;
            Charset encoding    = @params.Encoding;

            CmdLineUtil.checkInputFile("dictionary input file", dictInFile);
            CmdLineUtil.checkOutputFile("dictionary output file", dictOutFile);

            InputStreamReader @in  = null;
            OutputStream      @out = null;

            try
            {
                @in  = new InputStreamReader(new FileInputStream(dictInFile), encoding);
                @out = new FileOutputStream(dictOutFile);

                Dictionary dict = Dictionary.parseOneEntryPerLine(@in);
                dict.serialize(@out);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    @in.close();
                    @out.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }
        }
示例#22
0
        public override void run(string[] args)
        {
            if (0 == args.Length)
            {
                Console.WriteLine(Help);
            }
            else
            {
                DoccatModel model = (new DoccatModelLoader()).load(new File(args[0]));

                DocumentCategorizerME doccat = new DocumentCategorizerME(model);

                ObjectStream <string> documentStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
                perfMon.start();

                try
                {
                    string document;
                    while ((document = documentStream.read()) != null)
                    {
                        double[] prob     = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document));
                        string   category = doccat.getBestCategory(prob);

                        DocumentSample sample = new DocumentSample(category, document);
                        Console.WriteLine(sample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
示例#23
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            File modelOutFile = @params.Model;

            CmdLineUtil.checkOutputFile("sentence detector model", modelOutFile);

            ChunkerModel model;

            try
            {
                ChunkerFactory chunkerFactory = ChunkerFactory.create(@params.Factory);
                model = ChunkerME.train(@params.Lang, sampleStream, mlParams, chunkerFactory);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("chunker", modelOutFile, model);
        }
示例#24
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                POSModel model = (new POSModelLoader()).load(new File(args[0]));

                POSTaggerME tagger = new POSTaggerME(model);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
                        string[] tags = tagger.tag(whitespaceTokenizerLine);

                        POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
                        Console.WriteLine(sample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
示例#25
0
        public sealed override void run(string format, string[] args)
        {
            ModelUpdaterParams @params = validateAndParseParams <ModelUpdaterParams>(ArgumentParser.filter(args, typeof(ModelUpdaterParams)), typeof(ModelUpdaterParams));

            // Load model to be updated
            Jfile       modelFile           = @params.Model;
            ParserModel originalParserModel = (new ParserModelLoader()).load(modelFile);

            ObjectStreamFactory factory = getStreamFactory(format);

            string[] fargs = ArgumentParser.filter(args, factory.Parameters);
            validateFactoryArgs(factory, fargs);
            ObjectStream <Parse> sampleStream = factory.create <Parse>(fargs);

            ParserModel updatedParserModel;

            try
            {
                updatedParserModel = trainAndUpdate(originalParserModel, sampleStream, @params);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            CmdLineUtil.writeModel("parser", modelFile, updatedParserModel);
        }
示例#26
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                Detokenizer detokenizer = new DictionaryDetokenizer((new DetokenizationDictionaryLoader()).load(new File(args[0])));

                ObjectStream <string> tokenizedLineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string tokenizedLine;
                    while ((tokenizedLine = tokenizedLineStream.read()) != null)
                    {
                        // white space tokenize line
                        string[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);

                        Console.WriteLine(detokenizer.detokenize(tokens, null));

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
        public override ObjectStream <ChunkSample> create(string[] args)
        {
            Parameters @params = ArgumentParser.parse(args, typeof(Parameters));

            language = @params.Lang;

            FileInputStream sampleDataIn = CmdLineUtil.openInFile(@params.Data);

            ObjectStream <string> lineStream = new PlainTextByLineStream(sampleDataIn.Channel, @params.Encoding);

            ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream);

            if (@params.Start != null && @params.Start > -1)
            {
                sampleStream.Start = @params.Start.Value;
            }

            if (@params.End != null && @params.End > -1)
            {
                sampleStream.End = @params.End.Value;
            }

            return(sampleStream);
        }
示例#28
0
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            mlParams = CmdLineUtil.loadTrainingParameters(@params.Params, false);
            if (mlParams == null)
            {
                mlParams = ModelUtil.createTrainingParameters(@params.Iterations.Value, @params.Cutoff.Value);
            }

            POSTaggerEvaluationMonitor missclassifiedListener = null;

            if (@params.Misclassified.Value)
            {
                missclassifiedListener = new POSEvaluationErrorListener();
            }

            POSTaggerFineGrainedReportListener reportListener = null;
            Jfile        reportFile         = @params.ReportOutputFile;
            OutputStream reportOutputStream = null;

            if (reportFile != null)
            {
                CmdLineUtil.checkOutputFile("Report Output File", reportFile);
                try
                {
                    reportOutputStream = new FileOutputStream(reportFile);
                    reportListener     = new POSTaggerFineGrainedReportListener(reportOutputStream);
                }
                catch (FileNotFoundException e)
                {
                    throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message);
                }
            }

            POSTaggerCrossValidator validator;

            try
            {
                validator = new POSTaggerCrossValidator(@params.Lang, mlParams, @params.Dict, @params.Ngram, @params.TagDictCutoff, @params.Factory, missclassifiedListener, reportListener);

                validator.evaluate(sampleStream, @params.Folds.Value);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            Console.WriteLine("done");

            if (reportListener != null)
            {
                Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath);
                reportListener.writeReport();

                try
                {
                    // TODO: is it a problem to close the stream now?
                    reportOutputStream.close();
                }
                catch (IOException)
                {
                    // nothing to do
                }
            }

            Console.WriteLine();

            Console.WriteLine("Accuracy: " + validator.WordAccuracy);
        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            POSModel model = (new POSModelLoader()).load(@params.Model);

            POSTaggerEvaluationMonitor missclassifiedListener = null;

            if (@params.Misclassified.Value)
            {
                missclassifiedListener = new POSEvaluationErrorListener();
            }

            POSTaggerFineGrainedReportListener reportListener = null;
            File         reportFile         = @params.ReportOutputFile;
            OutputStream reportOutputStream = null;

            if (reportFile != null)
            {
                CmdLineUtil.checkOutputFile("Report Output File", reportFile);
                try
                {
                    reportOutputStream = new FileOutputStream(reportFile);
                    reportListener     = new POSTaggerFineGrainedReportListener(reportOutputStream);
                }
                catch (FileNotFoundException e)
                {
                    throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message);
                }
            }

            POSEvaluator evaluator = new POSEvaluator(new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener, reportListener);

            Console.Write("Evaluating ... ");
            try
            {
                evaluator.evaluate(sampleStream);
            }
            catch (IOException e)
            {
                Console.Error.WriteLine("failed");
                throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            Console.WriteLine("done");

            if (reportListener != null)
            {
                Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath);
                reportListener.writeReport();

                try
                {
                    // TODO: is it a problem to close the stream now?
                    reportOutputStream.close();
                }
                catch (IOException)
                {
                    // nothing to do
                }
            }

            Console.WriteLine();

            Console.WriteLine("Accuracy: " + evaluator.WordAccuracy);
        }
示例#30
0
        public override void run(string[] args)
        {
            Parameters @params = validateAndParseParams(args, typeof(Parameters));

            File testData    = new File(@params.CensusData);
            File dictOutFile = new File(@params.Dict);

            CmdLineUtil.checkInputFile("Name data", testData);
            CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile);

            FileInputStream           sampleDataIn = CmdLineUtil.openInFile(testData);
            ObjectStream <StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(@params.Encoding));

            Dictionary mDictionary;

            try
            {
                Console.WriteLine("Creating Dictionary...");
                mDictionary = createDictionary(sampleStream);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry this can fail..
                }
            }

            Console.WriteLine("Saving Dictionary...");

            OutputStream @out = null;

            try
            {
                @out = new FileOutputStream(dictOutFile);
                mDictionary.serialize(@out);
            }
            catch (IOException e)
            {
                throw new TerminateToolException(-1, "IO error while writing dictionary file: " + e.Message, e);
            }
            finally
            {
                if (@out != null)
                {
                    try
                    {
                        @out.close();
                    }
                    catch (IOException e)
                    {
                        // file might be damaged
                        throw new TerminateToolException(-1, "Attention: Failed to correctly write dictionary:" + e.Message, e);
                    }
                }
            }
        }