/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <returns>The trained <see cref="ParserModel" /> object.</returns> public static ParserModel Train( Monitor monitor, string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters) { var dict = BuildDictionary(samples, rules, parameters); samples.Reset(); var manifestInfoEntries = new Dictionary <string, string>(); // build //System.err.println("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dict); var buildReportMap = new Dictionary <string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // tag var posTaggerParams = parameters.GetNamespace("tagger"); if (!posTaggerParams.Contains(Parameters.BeamSize)) { posTaggerParams.Set(Parameters.BeamSize, "10"); } var posModel = POSTaggerME.Train(languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory()); samples.Reset(); // chunk var chunkModel = ChunkerME.Train(languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ParserChunkerFactory()); samples.Reset(); // check //System.err.println("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary <string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); return(new ParserModel(languageCode, buildModel, checkModel, posModel, chunkModel, rules, manifestInfoEntries)); }
public void Setup() { var p = new TrainingParameters(); p.Set(Parameters.Iterations, "70"); p.Set(Parameters.Cutoff, "1"); var chunkerModel = ChunkerME.Train("en", CreateSampleStream(), p, new ChunkerFactory()); chunker = new ChunkerME(chunkerModel); }
private void LoadChunker() { if (!alreadyLoadChunker) { java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(modelInpStream); chunker = new ChunkerME(chunkerModel); alreadyLoadChunker = true; } }
public static ChunkerModel TrainModel(string path) { FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read); ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(fs)); TrainingParameters trainParams = new TrainingParameters(); trainParams.Set(Parameters.Iterations, "70"); trainParams.Set(Parameters.Cutoff, "1"); return(ChunkerME.Train(TRAINING_LANGUAGE, stream, trainParams, new ChunkerFactory())); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0])); ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { POSSample posSample; try { posSample = POSSample.parse(line); } catch (InvalidFormatException) { Console.Error.WriteLine("Invalid format:"); Console.Error.WriteLine(line); continue; } string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags); Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public void TestTokenProbMinScoreOpenNLP() { var model = new ChunkerModel(Tests.OpenFile("opennlp/models/en-chunker.bin")); Assert.NotNull(model); var ckr = new ChunkerME(model); Assert.NotNull(ckr); var preds = chunker.TopKSequences(toks1, tags1, -5.55); Assert.AreEqual(4, preds.Length); Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count); Assert.True(expect1.SequenceEqual(preds[0].Outcomes)); Assert.False(expect1.SequenceEqual(preds[1].Outcomes)); }
private void LoadModels() { POSModel posModel; using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { posModel = new POSModel(modelFile); } ChunkerModel chunkerModel; using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { chunkerModel = new ChunkerModel(modelFile); } posTagger = new POSTaggerME(posModel); chunker = new ChunkerME(chunkerModel); }
private void LoadModels(string resourcesFolder) { POSModel posModel; using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { posModel = new POSModel(modelFile); } ChunkerModel chunkerModel; using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { chunkerModel = new ChunkerModel(modelFile); } posTagger = new POSTaggerME(posModel); chunker = new ChunkerME(chunkerModel); }
public NLP() { //loading sentence detector model java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin"); SentenceModel sentenceModel = new SentenceModel(modelInpStream); sentenceDetector = new SentenceDetectorME(sentenceModel); //loading tokenizer model modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin"); TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream); tokenizer = new TokenizerME(tokenizerModel); modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin"); POSModel posModel = new POSModel(modelInpStream); tagger = new POSTaggerME(posModel); modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(modelInpStream); chunker = new ChunkerME(chunkerModel); modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin"); ParserModel parserModel = new ParserModel(modelInpStream); parser = ParserFactory.create(parserModel); //loading stop words list StreamReader sr = new StreamReader("Resources\\english.stop.txt"); string line; while ((line = sr.ReadLine()) != null) { stopwords.Add(Stemming(line)); stopwords.Add(line); } }
public void TestDummyFactory() { var model = TrainModel(new DummyChunkerFactory()); Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory); Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>(model.Factory.GetContextGenerator()); Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>(model.Factory.GetSequenceValidator()); using (var stream = new MemoryStream()) { model.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var fromSerialized = new ChunkerModel(stream); Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory); Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>( fromSerialized.Factory.GetContextGenerator()); Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>( fromSerialized.Factory.GetSequenceValidator()); } var chunker = new ChunkerME(model); String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for", "it", "to", "supply", "200", "additional", "so-called","shipsets", "for", "the", "planes", "." }; String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB", "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." }; chunker.Chunk(toks1, tags1); }
public void Setup() { var sParams = new TrainingParameters(); sParams.Set(Parameters.Iterations, "70"); sParams.Set(Parameters.Cutoff, "1"); var jParams = new opennlp.tools.util.TrainingParameters(); jParams.put("Iterations", "70"); jParams.put("Cutoff", "1"); var sModel = ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), sParams, new ChunkerFactory()); var jModel = opennlp.tools.chunker.ChunkerME.train("en", JavaSampleStream(), jParams, new opennlp.tools.chunker.ChunkerFactory()); Assert.NotNull(sModel); Assert.NotNull(jModel); sChunker = new ChunkerME(sModel); jChunker = new opennlp.tools.chunker.ChunkerME(jModel); }
// Constructors and finalizers: private Repository() { _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1"); _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc); _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc); _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc); _openNlpModelsPath = RootDrive + _nlpFolder + _openNlpModelsFolder; _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc); _wordNetPath = RootDrive + _nlpFolder + _wordNetFolder; _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc); _grammarPath = RootDrive + _nlpFolder + _grammarFolder; _dataFolder = ("data/").Replace(@"\", Dsc); _nlpTextsPath = RootDrive + _dataFolder; string[] localTextDirectoryParts = { CurrentAssemblyDirectoryPath, "..", "..","..", "data" //"..", "..", "text" }; _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use // WordNet engine: Console.Write("Loading WordNet engine.... "); _wordNetEngine = new WordNetEngine(WordNetPath, true); Console.WriteLine("Done."); // OpenNLP sentence detector: Console.Write("Loading OpenNLP sentence detector.... "); java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin"); _sentenceModel = new SentenceModel(modelInputStream); modelInputStream.close(); _sentenceDetector = new SentenceDetectorME(_sentenceModel); Console.WriteLine("Done."); // OpenNLP tokenizer: Console.Write("Loading OpenNLP tokenizer.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin"); _tokenizerModel = new opennlp.tools.tokenize.TokenizerModel(modelInputStream); modelInputStream.close(); _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel); Console.WriteLine("Done."); // OpenNLP name finder: Console.Write("Loading OpenNLP name finder.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin"); _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream); modelInputStream.close(); _nameFinder = new NameFinderME(_tokenNameFinderModel); Console.WriteLine("Done."); // OpenNLP POS tagger: Console.Write("Loading OpenNLP POS tagger.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin"); _posModel = new POSModel(modelInputStream); modelInputStream.close(); _tagger = new POSTaggerME(_posModel); Console.WriteLine("Done."); // OpenNLP chunker: Console.Write("Loading OpenNLP chunker.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin"); _chunkerModel = new ChunkerModel(modelInputStream); modelInputStream.close(); _chunker = new ChunkerME(_chunkerModel); Console.WriteLine("Done."); // OpenNLP parser: if (_loadParser) { Console.Write("Loading OpenNLP parser.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin"); _parserModel = new ParserModel(modelInputStream); modelInputStream.close(); _parser = ParserFactory.create(_parserModel); Console.WriteLine("Done."); } // Stanford parser: //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz"); // Porter stemmer: _porterStemmer = new PorterStemmer(); }
public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences) { var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin"); var posModel = new POSModel(posModelStream); var pos = new POSTaggerME(posModel); var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin"); var model = new TokenizerModel(modelStream); var tokenizer = new TokenizerME(model); var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker); var chunkerModel = new ChunkerModel(chunkerModelStream); var chunker = new ChunkerME(chunkerModel); return Sentences.Select(p => { var tokens = tokenizer.tokenize(p); var tags = pos.tag(tokens); var chunks = chunker.chunk(tokens, tags); var res = new List<ChunkItem>(); for (var i = 0; i < chunks.Length; i++) { res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] }); } return res; }); }
public Chunker() { this.chunker = new ChunkerME(TrainModel(Environment.CurrentDirectory + TRAINING_FILE_PATH)); }
/// <summary> /// Trains a parser model with the given parameters. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="samples">The data samples.</param> /// <param name="rules">The head rules.</param> /// <param name="parameters">The machine learnable parameters.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The trained <see cref="ParserModel"/> object.</returns> /// <exception cref="System.NotSupportedException">Trainer type is not supported.</exception> public static ParserModel Train( string languageCode, IObjectStream <Parse> samples, AbstractHeadRules rules, TrainingParameters parameters, Monitor monitor) { var manifestInfoEntries = new Dictionary <string, string>(); System.Diagnostics.Debug.Print("Building dictionary"); var dictionary = BuildDictionary(samples, rules, parameters); samples.Reset(); // tag var posModel = POSTaggerME.Train( languageCode, new PosSampleStream(samples), parameters.GetNamespace("tagger"), new POSTaggerFactory(), monitor); samples.Reset(); // chunk var chunkModel = ChunkerME.Train( languageCode, new ChunkSampleStream(samples), parameters.GetNamespace("chunker"), new ChunkerFactory(), monitor); samples.Reset(); // build System.Diagnostics.Debug.Print("Training builder"); var bes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Build, dictionary); var buildReportMap = new Dictionary <string, string>(); var buildTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("build"), buildReportMap, monitor); var buildModel = buildTrainer.Train(bes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); samples.Reset(); // check System.Diagnostics.Debug.Print("Training checker"); var kes = new ParserEventStream(samples, rules, ParserEventTypeEnum.Check); var checkReportMap = new Dictionary <string, string>(); var checkTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("check"), checkReportMap, monitor); var checkModel = checkTrainer.Train(kes); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); samples.Reset(); // attach System.Diagnostics.Debug.Print("Training attacher"); var attachEvents = new ParserEventStream(samples, rules, ParserEventTypeEnum.Attach); var attachReportMap = new Dictionary <string, string>(); var attachTrainer = TrainerFactory.GetEventTrainer(parameters.GetNamespace("attach"), attachReportMap, monitor); var attachModel = attachTrainer.Train(attachEvents); Chunking.Parser.MergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach"); return(new ParserModel( languageCode, buildModel, checkModel, attachModel, posModel, chunkModel, rules, ParserType.TreeInsert, manifestInfoEntries)); }
public Chunker(FileStream modelStream) { ChunkerModel model = new ChunkerModel(modelStream); this.chunker = new ChunkerME(model); }
public Chunker(ChunkerModel model) { this.chunker = new ChunkerME(model); }
public NLPChunkerOp(ChunkerModel chunkerModel) { chunker = new ChunkerME(chunkerModel); }
private static ChunkerModel TrainModel(ChunkerFactory factory) { return(ChunkerME.Train("en", ChunkerMETest.CreateSampleStream(), TrainingParameters.DefaultParameters(), factory)); }