コード例 #1
0
ファイル: Parser.cs プロジェクト: qooba/SharpNL
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <returns>The trained <see cref="ParserModel" /> object.</returns>
        public static ParserModel Train(
            Monitor monitor,
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters)
        {
            var dict = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            var manifestInfoEntries = new Dictionary <string, string>();

            // build
            //System.err.println("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);


            var buildModel = buildTrainer.Train(bes);

            MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // tag
            var posTaggerParams = parameters.GetNamespace("tagger");

            if (!posTaggerParams.Contains(Parameters.BeamSize))
            {
                posTaggerParams.Set(Parameters.BeamSize, "10");
            }


            var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples),
                                             parameters.GetNamespace("tagger"), new POSTaggerFactory());

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(languageCode,
                                             new ChunkSampleStream(samples),
                                             parameters.GetNamespace("chunker"),
                                             new ParserChunkerFactory());

            samples.Reset();

            // check
            //System.err.println("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();
            var checkTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries));
        }
コード例 #2
0
        public void Setup()
        {
            var p = new TrainingParameters();

            p.Set(Parameters.Iterations, "70");
            p.Set(Parameters.Cutoff, "1");

            var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory());

            chunker = new ChunkerME(chunkerModel);
        }
コード例 #3
0
        private void LoadChunker()
        {
            if (!alreadyLoadChunker)
            {
                java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin");
                ChunkerModel            chunkerModel   = new ChunkerModel(modelInpStream);
                chunker = new ChunkerME(chunkerModel);

                alreadyLoadChunker = true;
            }
        }
コード例 #4
0
        public static ChunkerModel TrainModel(string path)
        {
            FileStream        fs     = new FileStream(path, FileMode.Open, FileAccess.Read);
            ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(fs));

            TrainingParameters trainParams = new TrainingParameters();

            trainParams.Set(Parameters.Iterations, "70");
            trainParams.Set(Parameters.Cutoff, "1");

            return(ChunkerME.Train(TRAINING_LANGUAGE, stream, trainParams, new ChunkerFactory()));
        }
コード例 #5
0
ファイル: ChunkerMETool.cs プロジェクト: quicquam/opennlp4net
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0]));

                ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        POSSample posSample;
                        try
                        {
                            posSample = POSSample.parse(line);
                        }
                        catch (InvalidFormatException)
                        {
                            Console.Error.WriteLine("Invalid format:");
                            Console.Error.WriteLine(line);
                            continue;
                        }

                        string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags);

                        Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
コード例 #6
0
        public void TestTokenProbMinScoreOpenNLP()
        {
            var model = new ChunkerModel(Tests.OpenFile("opennlp/models/en-chunker.bin"));

            Assert.NotNull(model);

            var ckr = new ChunkerME(model);

            Assert.NotNull(ckr);

            var preds = chunker.TopKSequences(toks1, tags1, -5.55);

            Assert.AreEqual(4, preds.Length);
            Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count);
            Assert.True(expect1.SequenceEqual(preds[0].Outcomes));
            Assert.False(expect1.SequenceEqual(preds[1].Outcomes));
        }
コード例 #7
0
        private void LoadModels()
        {
            POSModel posModel;

            using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                posModel = new POSModel(modelFile);
            }

            ChunkerModel chunkerModel;

            using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                chunkerModel = new ChunkerModel(modelFile);
            }

            posTagger = new POSTaggerME(posModel);
            chunker   = new ChunkerME(chunkerModel);
        }
コード例 #8
0
        private void LoadModels(string resourcesFolder)
        {
            POSModel posModel;

            using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                posModel = new POSModel(modelFile);
            }

            ChunkerModel chunkerModel;

            using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                chunkerModel = new ChunkerModel(modelFile);
            }

            posTagger = new POSTaggerME(posModel);
            chunker   = new ChunkerME(chunkerModel);
        }
コード例 #9
0
        public NLP()
        {
            //loading sentence detector model
            java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin");
            SentenceModel           sentenceModel  = new SentenceModel(modelInpStream);

            sentenceDetector = new SentenceDetectorME(sentenceModel);

            //loading tokenizer model
            modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin");
            TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream);

            tokenizer = new TokenizerME(tokenizerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin");
            POSModel posModel = new POSModel(modelInpStream);

            tagger = new POSTaggerME(posModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin");
            ChunkerModel chunkerModel = new ChunkerModel(modelInpStream);

            chunker = new ChunkerME(chunkerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin");
            ParserModel parserModel = new ParserModel(modelInpStream);

            parser = ParserFactory.create(parserModel);

            //loading stop words list
            StreamReader sr = new StreamReader("Resources\\english.stop.txt");
            string       line;

            while ((line = sr.ReadLine()) != null)
            {
                stopwords.Add(Stemming(line));
                stopwords.Add(line);
            }
        }
コード例 #10
0
        public void TestDummyFactory()
        {
            var model = TrainModel(new DummyChunkerFactory());

            Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory);
            Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>(model.Factory.GetContextGenerator());
            Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>(model.Factory.GetSequenceValidator());


            using (var stream = new MemoryStream()) {
                model.Serialize(new UnclosableStream(stream));
                stream.Seek(0, SeekOrigin.Begin);

                var fromSerialized = new ChunkerModel(stream);
                Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory);
                Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>(
                    fromSerialized.Factory.GetContextGenerator());
                Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>(
                    fromSerialized.Factory.GetSequenceValidator());
            }

            var chunker = new ChunkerME(model);

            String[] toks1 =
            {
                "Rockwell", "said", "the",    "agreement", "calls",      "for",
                "it",       "to",   "supply", "200",       "additional", "so-called","shipsets",
                "for",      "the",  "planes", "."
            };

            String[] tags1 =
            {
                "NNP", "VBD", "DT", "NN",  "VBZ", "IN", "PRP", "TO", "VB",
                "CD",  "JJ",  "JJ", "NNS", "IN",  "DT", "NNS", "."
            };

            chunker.Chunk(toks1, tags1);
        }
コード例 #11
0
        public void Setup()
        {
            var sParams = new TrainingParameters();

            sParams.Set(Parameters.Iterations, "70");
            sParams.Set(Parameters.Cutoff, "1");

            var jParams = new opennlp.tools.util.TrainingParameters();

            jParams.put("Iterations", "70");
            jParams.put("Cutoff", "1");

            var sModel = ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), sParams, new ChunkerFactory());

            var jModel = opennlp.tools.chunker.ChunkerME.train("en", JavaSampleStream(), jParams,
                                                               new opennlp.tools.chunker.ChunkerFactory());

            Assert.NotNull(sModel);
            Assert.NotNull(jModel);

            sChunker = new ChunkerME(sModel);
            jChunker = new opennlp.tools.chunker.ChunkerME(jModel);
        }
コード例 #12
0
        // Constructors and finalizers:
        private Repository()
        {
            _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1");

            _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc);
            _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc);

            _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc);
            _openNlpModelsPath   = RootDrive + _nlpFolder + _openNlpModelsFolder;

            _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc);
            _wordNetPath   = RootDrive + _nlpFolder + _wordNetFolder;

            _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc);
            _grammarPath   = RootDrive + _nlpFolder + _grammarFolder;

            _dataFolder   = ("data/").Replace(@"\", Dsc);
            _nlpTextsPath = RootDrive + _dataFolder;

            string[] localTextDirectoryParts =
            {
                CurrentAssemblyDirectoryPath,
                "..",                        "..","..", "data"
                //"..", "..", "text"
            };
            _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use

            // WordNet engine:
            Console.Write("Loading WordNet engine.... ");
            _wordNetEngine = new WordNetEngine(WordNetPath, true);
            Console.WriteLine("Done.");

            // OpenNLP sentence detector:
            Console.Write("Loading OpenNLP sentence detector.... ");
            java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin");
            _sentenceModel = new SentenceModel(modelInputStream);
            modelInputStream.close();
            _sentenceDetector = new SentenceDetectorME(_sentenceModel);
            Console.WriteLine("Done.");

            // OpenNLP tokenizer:
            Console.Write("Loading OpenNLP tokenizer.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin");
            _tokenizerModel  = new opennlp.tools.tokenize.TokenizerModel(modelInputStream);
            modelInputStream.close();
            _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel);
            Console.WriteLine("Done.");

            // OpenNLP name finder:
            Console.Write("Loading OpenNLP name finder.... ");
            modelInputStream      = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin");
            _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream);
            modelInputStream.close();
            _nameFinder = new NameFinderME(_tokenNameFinderModel);
            Console.WriteLine("Done.");

            // OpenNLP POS tagger:
            Console.Write("Loading OpenNLP POS tagger.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin");
            _posModel        = new POSModel(modelInputStream);
            modelInputStream.close();
            _tagger = new POSTaggerME(_posModel);
            Console.WriteLine("Done.");

            // OpenNLP chunker:
            Console.Write("Loading OpenNLP chunker.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin");
            _chunkerModel    = new ChunkerModel(modelInputStream);
            modelInputStream.close();
            _chunker = new ChunkerME(_chunkerModel);
            Console.WriteLine("Done.");

            // OpenNLP parser:
            if (_loadParser)
            {
                Console.Write("Loading OpenNLP parser.... ");
                modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin");
                _parserModel     = new ParserModel(modelInputStream);
                modelInputStream.close();
                _parser = ParserFactory.create(_parserModel);
                Console.WriteLine("Done.");
            }

            // Stanford parser:
            //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method
            _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz");

            // Porter stemmer:
            _porterStemmer = new PorterStemmer();
        }
コード例 #13
0
ファイル: TextProcessor.cs プロジェクト: baio/d-mill
        public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences)
        {
            var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin");

            var posModel = new POSModel(posModelStream);

            var pos = new POSTaggerME(posModel);

            var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin");

            var model = new TokenizerModel(modelStream);

            var tokenizer = new TokenizerME(model);

            var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker);

            var chunkerModel = new ChunkerModel(chunkerModelStream);

            var chunker = new ChunkerME(chunkerModel);

            return Sentences.Select(p => {

                var tokens = tokenizer.tokenize(p);

                var tags = pos.tag(tokens);

                var chunks = chunker.chunk(tokens, tags);

                var res = new List<ChunkItem>();

                for (var i = 0; i < chunks.Length; i++)
                {
                    res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] });
                }

                return res;
            });
        }
コード例 #14
0
 public Chunker()
 {
     this.chunker = new ChunkerME(TrainModel(Environment.CurrentDirectory + TRAINING_FILE_PATH));
 }
コード例 #15
0
        /// <summary>
        /// Trains a parser model with the given parameters.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="samples">The data samples.</param>
        /// <param name="rules">The head rules.</param>
        /// <param name="parameters">The machine learnable parameters.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The trained <see cref="ParserModel"/> object.</returns>
        /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception>
        public static ParserModel Train(
            string languageCode,
            IObjectStream <Parse> samples,
            AbstractHeadRules rules,
            TrainingParameters parameters,
            Monitor monitor)
        {
            var manifestInfoEntries = new Dictionary <string, string>();

            System.Diagnostics.Debug.Print("Building dictionary");

            var dictionary = BuildDictionary(samples, rules, parameters);

            samples.Reset();

            // tag
            var posModel = POSTaggerME.Train(
                languageCode,
                new PosSampleStream(samples),
                parameters.GetNamespace("tagger"),
                new POSTaggerFactory(), monitor);

            samples.Reset();

            // chunk
            var chunkModel = ChunkerME.Train(
                languageCode,
                new ChunkSampleStream(samples),
                parameters.GetNamespace("chunker"),
                new ChunkerFactory(), monitor);

            samples.Reset();

            // build
            System.Diagnostics.Debug.Print("Training builder");
            var bes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary);
            var buildReportMap = new Dictionary <string, string>();
            var buildTrainer   = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor);

            var buildModel = buildTrainer.Train(bes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build");

            samples.Reset();

            // check
            System.Diagnostics.Debug.Print("Training checker");
            var kes            = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check);
            var checkReportMap = new Dictionary <string, string>();

            var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor);

            var checkModel = checkTrainer.Train(kes);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");

            samples.Reset();

            // attach
            System.Diagnostics.Debug.Print("Training attacher");
            var attachEvents    = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach);
            var attachReportMap = new Dictionary <string, string>();

            var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor);

            var attachModel = attachTrainer.Train(attachEvents);

            Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach");

            return(new ParserModel(
                       languageCode,
                       buildModel,
                       checkModel,
                       attachModel,
                       posModel,
                       chunkModel,
                       rules,
                       ParserType.TreeInsert,
                       manifestInfoEntries));
        }
コード例 #16
0
        public Chunker(FileStream modelStream)
        {
            ChunkerModel model = new ChunkerModel(modelStream);

            this.chunker = new ChunkerME(model);
        }
コード例 #17
0
 public Chunker(ChunkerModel model)
 {
     this.chunker = new ChunkerME(model);
 }
コード例 #18
0
 public NLPChunkerOp(ChunkerModel chunkerModel)
 {
     chunker = new ChunkerME(chunkerModel);
 }
コード例 #19
0
 private static ChunkerModel TrainModel(ChunkerFactory factory)
 {
     return(ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), TrainingParameters.DefaultParameters(), factory));
 }