private void InitializeChunker() { InputStream modelIn = null; try { modelIn = new FileInputStream(ChunkerModel); ChunkerModel model = new ChunkerModel(modelIn); chunker = new ChunkerME(model); } catch (IOException ex) { chunker = null; } finally { if (modelIn != null) { try { modelIn.close(); } catch (IOException ex) { } } } }
public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType type, Dictionary <string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, type, manifestInfoEntries) { }
/// <summary> /// Initializes the current instance with the specified model and the specified beam size. /// </summary> /// <param name="model">The model for this chunker</param> /// <param name="beamSize">The size of the beam that should be used when decoding sequences.</param> /// <param name="sequenceValidator">The <see cref="ISequenceValidator{String}"/> to determines whether the outcome is valid for the preceding sequence. This can be used to implement constraints on what sequences are valid..</param> /// <param name="contextGenerator">The context generator.</param> internal ChunkerME(ChunkerModel model, int beamSize, ISequenceValidator<string> sequenceValidator, IChunkerContextGenerator contextGenerator) { // This method is marked as deprecated in the OpenNLP, but it is required in the Parser, // I could change the cg in the factory, but its not ideal in this situation (i think) :P this.sequenceValidator = sequenceValidator; this.contextGenerator = contextGenerator; this.model = model.ChunkerSequenceModel ?? new BeamSearch(beamSize, model.MaxentModel); }
public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
/// <summary> /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions. /// </summary> /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" /> /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register /// the proper serialization/deserialization methods for an new extension. /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks> protected override void CreateArtifactSerializers() { base.CreateArtifactSerializers(); // note from OpenNLP (for future adaptations) // In 1.6.x the headrules artifact is serialized with the new API // which uses the Serializable interface // This change is not backward compatible with the 1.5.x models. // In order to load 1.5.x model the English headrules serializer must be // put on the serializer map. RegisterArtifactType(".headrules", (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream), stream => HeadRulesManager.Deserialize(Language, stream)); RegisterArtifactType(".postagger", (artifact, stream) => { var model = artifact as POSModel; if (model == null) { throw new InvalidOperationException(); } model.Serialize(stream); }, stream => { var model = new POSModel(stream); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize)) { return(new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory)); } return(model); }); RegisterArtifactType(".chunker", (artifact, stream) => { var model = artifact as ChunkerModel; if (model == null) { throw new InvalidOperationException(); } model.Serialize(stream); }, stream => { var model = new ChunkerModel(stream); if (model.Version.Major == 1 && model.Version.Minor == 5) { return(new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory())); } return(model); }); }
private void LoadChunker() { if (!alreadyLoadChunker) { java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(modelInpStream); chunker = new ChunkerME(chunkerModel); alreadyLoadChunker = true; } }
public static ChunkerModel GetChunkerModel(string modelName, IResourceLoader loader) { if (!chunkerModels.TryGetValue(modelName, out ChunkerModel model) || model == null) { using (Stream resource = loader.OpenResource(modelName)) { model = new ChunkerModel(new ikvm.io.InputStreamWrapper(resource)); } chunkerModels[modelName] = model; } return(model); }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0])); ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { POSSample posSample; try { posSample = POSSample.parse(line); } catch (InvalidFormatException) { Console.Error.WriteLine("Invalid format:"); Console.Error.WriteLine(line); continue; } string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags); Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public ParserChunkerSequenceValidator(ChunkerModel model) { var seqModel = model.ChunkerSequenceModel; var outcomes = seqModel.GetOutcomes(); continueStartMap = new Dictionary <string, string>(); foreach (var outcome in outcomes) { if (outcome.StartsWith(AbstractBottomUpParser.CONT)) { continueStartMap.Add(outcome, AbstractBottomUpParser.START + outcome.Substring(AbstractBottomUpParser.CONT.Length)); } } }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); } Manifest[PARSER_TYPE] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); } Manifest[PARSER_TYPE] = "TREEINSERT"; artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel; break; default: throw new ArgumentException(@"Unknown mode type.", "modelType"); } artifactMap[BUILD_MODEL_ENTRY_NAME] = buildModel; artifactMap[CHECK_MODEL_ENTRY_NAME] = checkModel; artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME] = parserTagger; artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger; artifactMap[HEAD_RULES_MODEL_ENTRY_NAME] = headRules; CheckArtifactMap(); }
public void TestTokenProbMinScoreOpenNLP() { var model = new ChunkerModel(Tests.OpenFile("opennlp/models/en-chunker.bin")); Assert.NotNull(model); var ckr = new ChunkerME(model); Assert.NotNull(ckr); var preds = chunker.TopKSequences(toks1, tags1, -5.55); Assert.AreEqual(4, preds.Length); Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count); Assert.True(expect1.SequenceEqual(preds[0].Outcomes)); Assert.False(expect1.SequenceEqual(preds[1].Outcomes)); }
public void TestDefaultFactory() { var model = TrainModel(new ChunkerFactory()); Assert.IsInstanceOf <DefaultChunkerContextGenerator>(model.Factory.GetContextGenerator()); Assert.IsInstanceOf <DefaultChunkerSequenceValidator>(model.Factory.GetSequenceValidator()); using (var stream = new MemoryStream()) { model.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var fromSerialized = new ChunkerModel(stream); Assert.IsInstanceOf <DefaultChunkerContextGenerator>(fromSerialized.Factory.GetContextGenerator()); Assert.IsInstanceOf <DefaultChunkerSequenceValidator>(fromSerialized.Factory.GetSequenceValidator()); } }
private void LoadModels() { POSModel posModel; using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { posModel = new POSModel(modelFile); } ChunkerModel chunkerModel; using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { chunkerModel = new ChunkerModel(modelFile); } posTagger = new POSTaggerME(posModel); chunker = new ChunkerME(chunkerModel); }
private void LoadModels(string resourcesFolder) { POSModel posModel; using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { posModel = new POSModel(modelFile); } ChunkerModel chunkerModel; using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { chunkerModel = new ChunkerModel(modelFile); } posTagger = new POSTaggerME(posModel); chunker = new ChunkerME(chunkerModel); }
public void TestDefaultFactory() { var model = TrainModel(new ChunkerFactory()); Assert.IsInstanceOf<DefaultChunkerContextGenerator>(model.Factory.GetContextGenerator()); Assert.IsInstanceOf<DefaultChunkerSequenceValidator>(model.Factory.GetSequenceValidator()); using (var stream = new MemoryStream()) { model.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var fromSerialized = new ChunkerModel(stream); Assert.IsInstanceOf<DefaultChunkerContextGenerator>(fromSerialized.Factory.GetContextGenerator()); Assert.IsInstanceOf<DefaultChunkerSequenceValidator>(fromSerialized.Factory.GetSequenceValidator()); } }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException("modelType", "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
public NLP() { //loading sentence detector model java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin"); SentenceModel sentenceModel = new SentenceModel(modelInpStream); sentenceDetector = new SentenceDetectorME(sentenceModel); //loading tokenizer model modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin"); TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream); tokenizer = new TokenizerME(tokenizerModel); modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin"); POSModel posModel = new POSModel(modelInpStream); tagger = new POSTaggerME(posModel); modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(modelInpStream); chunker = new ChunkerME(chunkerModel); modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin"); ParserModel parserModel = new ParserModel(modelInpStream); parser = ParserFactory.create(parserModel); //loading stop words list StreamReader sr = new StreamReader("Resources\\english.stop.txt"); string line; while ((line = sr.ReadLine()) != null) { stopwords.Add(Stemming(line)); stopwords.Add(line); } }
public void TestDummyFactory() { var model = TrainModel(new DummyChunkerFactory()); Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory); Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>(model.Factory.GetContextGenerator()); Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>(model.Factory.GetSequenceValidator()); using (var stream = new MemoryStream()) { model.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var fromSerialized = new ChunkerModel(stream); Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory); Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>( fromSerialized.Factory.GetContextGenerator()); Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>( fromSerialized.Factory.GetSequenceValidator()); } var chunker = new ChunkerME(model); String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for", "it", "to", "supply", "200", "additional", "so-called","shipsets", "for", "the", "planes", "." }; String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB", "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." }; chunker.Chunk(toks1, tags1); }
public void TestDummyFactory() { var model = TrainModel(new DummyChunkerFactory()); Assert.IsInstanceOf<DummyChunkerFactory>(model.Factory); Assert.IsInstanceOf<DummyChunkerFactory.DummyContextGenerator>(model.Factory.GetContextGenerator()); Assert.IsInstanceOf<DummyChunkerFactory.DummySequenceValidator>(model.Factory.GetSequenceValidator()); using (var stream = new MemoryStream()) { model.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var fromSerialized = new ChunkerModel(stream); Assert.IsInstanceOf<DummyChunkerFactory>(model.Factory); Assert.IsInstanceOf<DummyChunkerFactory.DummyContextGenerator>( fromSerialized.Factory.GetContextGenerator()); Assert.IsInstanceOf<DummyChunkerFactory.DummySequenceValidator>( fromSerialized.Factory.GetSequenceValidator()); } var chunker = new ChunkerME(model); String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for", "it", "to", "supply", "200", "additional", "so-called", "shipsets", "for", "the", "planes", "." }; String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB", "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." }; chunker.Chunk(toks1, tags1); }
public ParserModel UpdateChunkerModel(ChunkerModel chunkModel) { return(new ParserModel(Language, BuildModel, CheckModel, AttachModel, ParserTaggerModel, chunkModel, HeadRules, ParserType)); }
public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences) { var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin"); var posModel = new POSModel(posModelStream); var pos = new POSTaggerME(posModel); var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin"); var model = new TokenizerModel(modelStream); var tokenizer = new TokenizerME(model); var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker); var chunkerModel = new ChunkerModel(chunkerModelStream); var chunker = new ChunkerME(chunkerModel); return Sentences.Select(p => { var tokens = tokenizer.tokenize(p); var tags = pos.tag(tokens); var chunks = chunker.chunk(tokens, tags); var res = new List<ChunkItem>(); for (var i = 0; i < chunks.Length; i++) { res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] }); } return res; }); }
public Chunker(FileStream modelStream) { ChunkerModel model = new ChunkerModel(modelStream); this.chunker = new ChunkerME(model); }
// Constructors and finalizers: private Repository() { _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1"); _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc); _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc); _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc); _openNlpModelsPath = RootDrive + _nlpFolder + _openNlpModelsFolder; _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc); _wordNetPath = RootDrive + _nlpFolder + _wordNetFolder; _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc); _grammarPath = RootDrive + _nlpFolder + _grammarFolder; _dataFolder = ("data/").Replace(@"\", Dsc); _nlpTextsPath = RootDrive + _dataFolder; string[] localTextDirectoryParts = { CurrentAssemblyDirectoryPath, "..", "..","..", "data" //"..", "..", "text" }; _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use // WordNet engine: Console.Write("Loading WordNet engine.... "); _wordNetEngine = new WordNetEngine(WordNetPath, true); Console.WriteLine("Done."); // OpenNLP sentence detector: Console.Write("Loading OpenNLP sentence detector.... "); java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin"); _sentenceModel = new SentenceModel(modelInputStream); modelInputStream.close(); _sentenceDetector = new SentenceDetectorME(_sentenceModel); Console.WriteLine("Done."); // OpenNLP tokenizer: Console.Write("Loading OpenNLP tokenizer.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin"); _tokenizerModel = new opennlp.tools.tokenize.TokenizerModel(modelInputStream); modelInputStream.close(); _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel); Console.WriteLine("Done."); // OpenNLP name finder: Console.Write("Loading OpenNLP name finder.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin"); _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream); modelInputStream.close(); _nameFinder = new NameFinderME(_tokenNameFinderModel); Console.WriteLine("Done."); // OpenNLP POS tagger: Console.Write("Loading OpenNLP POS tagger.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin"); _posModel = new POSModel(modelInputStream); modelInputStream.close(); _tagger = new POSTaggerME(_posModel); Console.WriteLine("Done."); // OpenNLP chunker: Console.Write("Loading OpenNLP chunker.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin"); _chunkerModel = new ChunkerModel(modelInputStream); modelInputStream.close(); _chunker = new ChunkerME(_chunkerModel); Console.WriteLine("Done."); // OpenNLP parser: if (_loadParser) { Console.Write("Loading OpenNLP parser.... "); modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin"); _parserModel = new ParserModel(modelInputStream); modelInputStream.close(); _parser = ParserFactory.create(_parserModel); Console.WriteLine("Done."); } // Stanford parser: //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz"); // Porter stemmer: _porterStemmer = new PorterStemmer(); }
public Chunker(ChunkerModel model) { this.chunker = new ChunkerME(model); }
public override void run(string format, string[] args) { base.run(format, args); ChunkerModel model = (new ChunkerModelLoader()).load(@params.Model); IList <EvaluationMonitor <ChunkSample> > listeners = new LinkedList <EvaluationMonitor <ChunkSample> >(); ChunkerDetailedFMeasureListener detailedFMeasureListener = null; if (@params.Misclassified.Value) { listeners.Add(new ChunkEvaluationErrorListener()); } if (@params.DetailedF.Value) { detailedFMeasureListener = new ChunkerDetailedFMeasureListener(); listeners.Add(detailedFMeasureListener); } ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE), listeners.ToArray()); PerformanceMonitor monitor = new PerformanceMonitor("sent"); ObjectStream <ChunkSample> measuredSampleStream = new ObjectStreamAnonymousInnerClassHelper(this, monitor); monitor.startAndPrintThroughput(); try { evaluator.evaluate(measuredSampleStream); } catch (IOException e) { Console.Error.WriteLine("failed"); throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e); } finally { try { measuredSampleStream.close(); } catch (IOException) { // sorry that this can fail } } monitor.stopAndPrintFinalResult(); Console.WriteLine(); if (detailedFMeasureListener == null) { Console.WriteLine(evaluator.FMeasure); } else { Console.WriteLine(detailedFMeasureListener.ToString()); } }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, Dictionary<string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, ParserType.Chunking, manifestInfoEntries) { }
public ParserModel UpdateChunkerModel(ChunkerModel chunkModel) { return new ParserModel(Language, BuildModel, CheckModel, AttachModel, ParserTaggerModel, chunkModel, HeadRules, ParserType); }
/// <summary> /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions. /// </summary> /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" /> /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register /// the proper serialization/deserialization methods for an new extension. /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks> protected override void CreateArtifactSerializers() { base.CreateArtifactSerializers(); // note from OpenNLP (for future adaptations) // In 1.6.x the headrules artifact is serialized with the new API // which uses the Serializable interface // This change is not backward compatible with the 1.5.x models. // In order to load 1.5.x model the English headrules serializer must be // put on the serializer map. RegisterArtifactType(".headrules", (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream), stream => HeadRulesManager.Deserialize(Language, stream)); RegisterArtifactType(".postagger", (artifact, stream) => { var model = artifact as POSModel; if (model == null) throw new InvalidOperationException(); model.Serialize(stream); }, stream => { var model = new POSModel(stream); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize)) return new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory); return model; }); RegisterArtifactType(".chunker", (artifact, stream) => { var model = artifact as ChunkerModel; if (model == null) throw new InvalidOperationException(); model.Serialize(stream); }, stream => { var model = new ChunkerModel(stream); if (model.Version.Major == 1 && model.Version.Minor == 5) { return new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory()); } return model; }); }
public NLPChunkerOp(ChunkerModel chunkerModel) { chunker = new ChunkerME(chunkerModel); }
/// <summary> /// Initializes a new instance of the <see cref="ChunkerME"/> with the specified <see cref="ChunkerModel"/>. /// </summary> /// <param name="model">The chunker model.</param> public ChunkerME(ChunkerModel model) { contextGenerator = model.Factory.GetContextGenerator(); sequenceValidator = model.Factory.GetSequenceValidator(); this.model = model.ChunkerSequenceModel ?? new BeamSearch(model.BeamSize, model.MaxentModel); }