Example #1
0
        private void InitializeChunker()
        {
            InputStream modelIn = null;

            try
            {
                modelIn = new FileInputStream(ChunkerModel);
                ChunkerModel model = new ChunkerModel(modelIn);
                chunker = new ChunkerME(model);
            }
            catch (IOException ex)
            {
                chunker = null;
            }
            finally
            {
                if (modelIn != null)
                {
                    try
                    {
                        modelIn.close();
                    }
                    catch (IOException ex)
                    {
                    }
                }
            }
        }
Example #2
0
 public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger,
                    ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType type,
                    Dictionary <string, string> manifestInfoEntries)
     : this(
         languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, type,
         manifestInfoEntries)
 {
 }
Example #3
0
        /// <summary>
        /// Initializes the current instance with the specified model and the specified beam size.
        /// </summary>
        /// <param name="model">The model for this chunker</param>
        /// <param name="beamSize">The size of the beam that should be used when decoding sequences.</param>
        /// <param name="sequenceValidator">The <see cref="ISequenceValidator{String}"/> to determines whether the outcome is valid for the preceding sequence. This can be used to implement constraints on what sequences are valid..</param>
        /// <param name="contextGenerator">The context generator.</param>
        internal ChunkerME(ChunkerModel model, int beamSize, ISequenceValidator<string> sequenceValidator, IChunkerContextGenerator contextGenerator) {
            // This method is marked as deprecated in the OpenNLP, but it is required in the Parser,
            // I could change the cg in the factory, but its not ideal in this situation (i think) :P

            this.sequenceValidator = sequenceValidator;
            this.contextGenerator = contextGenerator;
            this.model = model.ChunkerSequenceModel ?? new BeamSearch(beamSize, model.MaxentModel);
        }
Example #4
0
 public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel,
                    IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules,
                    ParserType modelType)
     : this(
         languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType,
         null)
 {
 }
Example #5
0
        /// <summary>
        /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions.
        /// </summary>
        /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" />
        /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register
        /// the proper serialization/deserialization methods for an new extension.
        /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks>
        protected override void CreateArtifactSerializers()
        {
            base.CreateArtifactSerializers();
            // note from OpenNLP (for future adaptations)

            // In 1.6.x the headrules artifact is serialized with the new API
            // which uses the Serializable interface
            // This change is not backward compatible with the 1.5.x models.
            // In order to load 1.5.x model the English headrules serializer must be
            // put on the serializer map.

            RegisterArtifactType(".headrules",
                                 (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream),
                                 stream => HeadRulesManager.Deserialize(Language, stream));

            RegisterArtifactType(".postagger", (artifact, stream) => {
                var model = artifact as POSModel;
                if (model == null)
                {
                    throw new InvalidOperationException();
                }

                model.Serialize(stream);
            }, stream => {
                var model = new POSModel(stream);

                // The 1.6.x models write the non-default beam size into the model itself.
                // In 1.5.x the parser configured the beam size when the model was loaded,
                // this is not possible anymore with the new APIs
                if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize))
                {
                    return(new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory));
                }

                return(model);
            });

            RegisterArtifactType(".chunker", (artifact, stream) => {
                var model = artifact as ChunkerModel;
                if (model == null)
                {
                    throw new InvalidOperationException();
                }

                model.Serialize(stream);
            }, stream => {
                var model = new ChunkerModel(stream);

                if (model.Version.Major == 1 && model.Version.Minor == 5)
                {
                    return(new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory()));
                }

                return(model);
            });
        }
        private void LoadChunker()
        {
            if (!alreadyLoadChunker)
            {
                java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin");
                ChunkerModel            chunkerModel   = new ChunkerModel(modelInpStream);
                chunker = new ChunkerME(chunkerModel);

                alreadyLoadChunker = true;
            }
        }
Example #7
0
 public static ChunkerModel GetChunkerModel(string modelName, IResourceLoader loader)
 {
     if (!chunkerModels.TryGetValue(modelName, out ChunkerModel model) || model == null)
     {
         using (Stream resource = loader.OpenResource(modelName))
         {
             model = new ChunkerModel(new ikvm.io.InputStreamWrapper(resource));
         }
         chunkerModels[modelName] = model;
     }
     return(model);
 }
Example #8
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                ChunkerModel model = (new ChunkerModelLoader()).load(new File(args[0]));

                ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        POSSample posSample;
                        try
                        {
                            posSample = POSSample.parse(line);
                        }
                        catch (InvalidFormatException)
                        {
                            Console.Error.WriteLine("Invalid format:");
                            Console.Error.WriteLine(line);
                            continue;
                        }

                        string[] chunks = chunker.chunk(posSample.Sentence, posSample.Tags);

                        Console.WriteLine((new ChunkSample(posSample.Sentence, posSample.Tags, chunks)).nicePrint());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Example #9
0
        public ParserChunkerSequenceValidator(ChunkerModel model)
        {
            var seqModel = model.ChunkerSequenceModel;
            var outcomes = seqModel.GetOutcomes();

            continueStartMap = new Dictionary <string, string>();

            foreach (var outcome in outcomes)
            {
                if (outcome.StartsWith(AbstractBottomUpParser.CONT))
                {
                    continueStartMap.Add(outcome,
                                         AbstractBottomUpParser.START + outcome.Substring(AbstractBottomUpParser.CONT.Length));
                }
            }
        }
Example #10
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries)
        {
            switch (modelType)
            {
            case ParserType.Chunking:
                if (attachModel != null)
                {
                    throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel));
                }

                Manifest[ParserTypeParameter] = "CHUNKING";
                break;

            case ParserType.TreeInsert:
                if (attachModel == null)
                {
                    throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                                                nameof(attachModel));
                }

                Manifest[ParserTypeParameter] = "TREEINSERT";

                artifactMap[EntryAttachModel] = attachModel;

                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type");
            }

            artifactMap[EntryBuildModel]         = buildModel;
            artifactMap[EntryCheckModel]         = checkModel;
            artifactMap[EntryParserTaggerModel]  = parserTagger;
            artifactMap[EntryChunkerTaggerModel] = chunkerTagger;
            artifactMap[EntryHeadRules]          = headRules;

            CheckArtifactMap();
        }
Example #11
0
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries)
        {
            switch (modelType)
            {
            case ParserType.Chunking:
                if (attachModel != null)
                {
                    throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel");
                }

                Manifest[PARSER_TYPE] = "CHUNKING";
                break;

            case ParserType.TreeInsert:
                if (attachModel == null)
                {
                    throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                                                "attachModel");
                }

                Manifest[PARSER_TYPE] = "TREEINSERT";

                artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel;

                break;

            default:
                throw new ArgumentException(@"Unknown mode type.", "modelType");
            }

            artifactMap[BUILD_MODEL_ENTRY_NAME]          = buildModel;
            artifactMap[CHECK_MODEL_ENTRY_NAME]          = checkModel;
            artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME]  = parserTagger;
            artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger;
            artifactMap[HEAD_RULES_MODEL_ENTRY_NAME]     = headRules;

            CheckArtifactMap();
        }
Example #12
0
        public void TestTokenProbMinScoreOpenNLP()
        {
            var model = new ChunkerModel(Tests.OpenFile("opennlp/models/en-chunker.bin"));

            Assert.NotNull(model);

            var ckr = new ChunkerME(model);

            Assert.NotNull(ckr);

            var preds = chunker.TopKSequences(toks1, tags1, -5.55);

            Assert.AreEqual(4, preds.Length);
            Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count);
            Assert.True(expect1.SequenceEqual(preds[0].Outcomes));
            Assert.False(expect1.SequenceEqual(preds[1].Outcomes));
        }
Example #13
0
        public void TestDefaultFactory()
        {
            var model = TrainModel(new ChunkerFactory());

            Assert.IsInstanceOf <DefaultChunkerContextGenerator>(model.Factory.GetContextGenerator());
            Assert.IsInstanceOf <DefaultChunkerSequenceValidator>(model.Factory.GetSequenceValidator());

            using (var stream = new MemoryStream()) {
                model.Serialize(new UnclosableStream(stream));

                stream.Seek(0, SeekOrigin.Begin);

                var fromSerialized = new ChunkerModel(stream);

                Assert.IsInstanceOf <DefaultChunkerContextGenerator>(fromSerialized.Factory.GetContextGenerator());
                Assert.IsInstanceOf <DefaultChunkerSequenceValidator>(fromSerialized.Factory.GetSequenceValidator());
            }
        }
        private void LoadModels()
        {
            POSModel posModel;

            using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                posModel = new POSModel(modelFile);
            }

            ChunkerModel chunkerModel;

            using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                chunkerModel = new ChunkerModel(modelFile);
            }

            posTagger = new POSTaggerME(posModel);
            chunker   = new ChunkerME(chunkerModel);
        }
        private void LoadModels(string resourcesFolder)
        {
            POSModel posModel;

            using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                posModel = new POSModel(modelFile);
            }

            ChunkerModel chunkerModel;

            using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                chunkerModel = new ChunkerModel(modelFile);
            }

            posTagger = new POSTaggerME(posModel);
            chunker   = new ChunkerME(chunkerModel);
        }
        public void TestDefaultFactory() {

            var model = TrainModel(new ChunkerFactory());

            Assert.IsInstanceOf<DefaultChunkerContextGenerator>(model.Factory.GetContextGenerator());
            Assert.IsInstanceOf<DefaultChunkerSequenceValidator>(model.Factory.GetSequenceValidator());

            using (var stream = new MemoryStream()) {
                
                model.Serialize(new UnclosableStream(stream));

                stream.Seek(0, SeekOrigin.Begin);

                var fromSerialized = new ChunkerModel(stream);

                Assert.IsInstanceOf<DefaultChunkerContextGenerator>(fromSerialized.Factory.GetContextGenerator());
                Assert.IsInstanceOf<DefaultChunkerSequenceValidator>(fromSerialized.Factory.GetSequenceValidator());

            }
        }
Example #17
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary<string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) {

            switch (modelType) {
                case ParserType.Chunking:
                    if (attachModel != null)
                        throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel");

                    Manifest[ParserTypeParameter] = "CHUNKING";
                    break;
                case ParserType.TreeInsert:
                    if (attachModel == null)
                        throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                            "attachModel");

                    Manifest[ParserTypeParameter] = "TREEINSERT";

                    artifactMap[EntryAttachModel] = attachModel;

                    break;
                default:
                    throw new ArgumentOutOfRangeException("modelType", "Unknown model type");
            }

            artifactMap[EntryBuildModel] = buildModel;
            artifactMap[EntryCheckModel] = checkModel;
            artifactMap[EntryParserTaggerModel] = parserTagger;
            artifactMap[EntryChunkerTaggerModel] = chunkerTagger;
            artifactMap[EntryHeadRules] = headRules;

            CheckArtifactMap();
        }
Example #18
0
        public NLP()
        {
            //loading sentence detector model
            java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin");
            SentenceModel           sentenceModel  = new SentenceModel(modelInpStream);

            sentenceDetector = new SentenceDetectorME(sentenceModel);

            //loading tokenizer model
            modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin");
            TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream);

            tokenizer = new TokenizerME(tokenizerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin");
            POSModel posModel = new POSModel(modelInpStream);

            tagger = new POSTaggerME(posModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin");
            ChunkerModel chunkerModel = new ChunkerModel(modelInpStream);

            chunker = new ChunkerME(chunkerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin");
            ParserModel parserModel = new ParserModel(modelInpStream);

            parser = ParserFactory.create(parserModel);

            //loading stop words list
            StreamReader sr = new StreamReader("Resources\\english.stop.txt");
            string       line;

            while ((line = sr.ReadLine()) != null)
            {
                stopwords.Add(Stemming(line));
                stopwords.Add(line);
            }
        }
Example #19
0
        public void TestDummyFactory()
        {
            var model = TrainModel(new DummyChunkerFactory());

            Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory);
            Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>(model.Factory.GetContextGenerator());
            Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>(model.Factory.GetSequenceValidator());


            using (var stream = new MemoryStream()) {
                model.Serialize(new UnclosableStream(stream));
                stream.Seek(0, SeekOrigin.Begin);

                var fromSerialized = new ChunkerModel(stream);
                Assert.IsInstanceOf <DummyChunkerFactory>(model.Factory);
                Assert.IsInstanceOf <DummyChunkerFactory.DummyContextGenerator>(
                    fromSerialized.Factory.GetContextGenerator());
                Assert.IsInstanceOf <DummyChunkerFactory.DummySequenceValidator>(
                    fromSerialized.Factory.GetSequenceValidator());
            }

            var chunker = new ChunkerME(model);

            String[] toks1 =
            {
                "Rockwell", "said", "the",    "agreement", "calls",      "for",
                "it",       "to",   "supply", "200",       "additional", "so-called","shipsets",
                "for",      "the",  "planes", "."
            };

            String[] tags1 =
            {
                "NNP", "VBD", "DT", "NN",  "VBZ", "IN", "PRP", "TO", "VB",
                "CD",  "JJ",  "JJ", "NNS", "IN",  "DT", "NNS", "."
            };

            chunker.Chunk(toks1, tags1);
        }
        public void TestDummyFactory() {

            var model = TrainModel(new DummyChunkerFactory());

            Assert.IsInstanceOf<DummyChunkerFactory>(model.Factory);
            Assert.IsInstanceOf<DummyChunkerFactory.DummyContextGenerator>(model.Factory.GetContextGenerator());
            Assert.IsInstanceOf<DummyChunkerFactory.DummySequenceValidator>(model.Factory.GetSequenceValidator());


            using (var stream = new MemoryStream()) {
                model.Serialize(new UnclosableStream(stream));
                stream.Seek(0, SeekOrigin.Begin);

                var fromSerialized = new ChunkerModel(stream);
                Assert.IsInstanceOf<DummyChunkerFactory>(model.Factory);
                Assert.IsInstanceOf<DummyChunkerFactory.DummyContextGenerator>(
                    fromSerialized.Factory.GetContextGenerator());
                Assert.IsInstanceOf<DummyChunkerFactory.DummySequenceValidator>(
                    fromSerialized.Factory.GetSequenceValidator());
            }

            var chunker = new ChunkerME(model);

            String[] toks1 = {
                "Rockwell", "said", "the", "agreement", "calls", "for",
                "it", "to", "supply", "200", "additional", "so-called", "shipsets",
                "for", "the", "planes", "."
            };

            String[] tags1 = {
                "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
                "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "."
            };

            chunker.Chunk(toks1, tags1);
        }
Example #21
0
 public ParserModel UpdateChunkerModel(ChunkerModel chunkModel)
 {
     return(new ParserModel(Language, BuildModel, CheckModel, AttachModel, ParserTaggerModel, chunkModel, HeadRules, ParserType));
 }
Example #22
0
        public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences)
        {
            var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin");

            var posModel = new POSModel(posModelStream);

            var pos = new POSTaggerME(posModel);

            var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin");

            var model = new TokenizerModel(modelStream);

            var tokenizer = new TokenizerME(model);

            var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker);

            var chunkerModel = new ChunkerModel(chunkerModelStream);

            var chunker = new ChunkerME(chunkerModel);

            return Sentences.Select(p => {

                var tokens = tokenizer.tokenize(p);

                var tags = pos.tag(tokens);

                var chunks = chunker.chunk(tokens, tags);

                var res = new List<ChunkItem>();

                for (var i = 0; i < chunks.Length; i++)
                {
                    res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] });
                }

                return res;
            });
        }
Example #23
0
        public Chunker(FileStream modelStream)
        {
            ChunkerModel model = new ChunkerModel(modelStream);

            this.chunker = new ChunkerME(model);
        }
Example #24
0
        // Constructors and finalizers:
        private Repository()
        {
            _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1");

            _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc);
            _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc);

            _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc);
            _openNlpModelsPath   = RootDrive + _nlpFolder + _openNlpModelsFolder;

            _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc);
            _wordNetPath   = RootDrive + _nlpFolder + _wordNetFolder;

            _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc);
            _grammarPath   = RootDrive + _nlpFolder + _grammarFolder;

            _dataFolder   = ("data/").Replace(@"\", Dsc);
            _nlpTextsPath = RootDrive + _dataFolder;

            string[] localTextDirectoryParts =
            {
                CurrentAssemblyDirectoryPath,
                "..",                        "..","..", "data"
                //"..", "..", "text"
            };
            _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use

            // WordNet engine:
            Console.Write("Loading WordNet engine.... ");
            _wordNetEngine = new WordNetEngine(WordNetPath, true);
            Console.WriteLine("Done.");

            // OpenNLP sentence detector:
            Console.Write("Loading OpenNLP sentence detector.... ");
            java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin");
            _sentenceModel = new SentenceModel(modelInputStream);
            modelInputStream.close();
            _sentenceDetector = new SentenceDetectorME(_sentenceModel);
            Console.WriteLine("Done.");

            // OpenNLP tokenizer:
            Console.Write("Loading OpenNLP tokenizer.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin");
            _tokenizerModel  = new opennlp.tools.tokenize.TokenizerModel(modelInputStream);
            modelInputStream.close();
            _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel);
            Console.WriteLine("Done.");

            // OpenNLP name finder:
            Console.Write("Loading OpenNLP name finder.... ");
            modelInputStream      = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin");
            _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream);
            modelInputStream.close();
            _nameFinder = new NameFinderME(_tokenNameFinderModel);
            Console.WriteLine("Done.");

            // OpenNLP POS tagger:
            Console.Write("Loading OpenNLP POS tagger.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin");
            _posModel        = new POSModel(modelInputStream);
            modelInputStream.close();
            _tagger = new POSTaggerME(_posModel);
            Console.WriteLine("Done.");

            // OpenNLP chunker:
            Console.Write("Loading OpenNLP chunker.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin");
            _chunkerModel    = new ChunkerModel(modelInputStream);
            modelInputStream.close();
            _chunker = new ChunkerME(_chunkerModel);
            Console.WriteLine("Done.");

            // OpenNLP parser:
            if (_loadParser)
            {
                Console.Write("Loading OpenNLP parser.... ");
                modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin");
                _parserModel     = new ParserModel(modelInputStream);
                modelInputStream.close();
                _parser = ParserFactory.create(_parserModel);
                Console.WriteLine("Done.");
            }

            // Stanford parser:
            //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method
            _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz");

            // Porter stemmer:
            _porterStemmer = new PorterStemmer();
        }
Example #25
0
 public Chunker(ChunkerModel model)
 {
     this.chunker = new ChunkerME(model);
 }
Example #26
0
        public void TestTokenProbMinScoreOpenNLP() {

            var model = new ChunkerModel(Tests.OpenFile("opennlp/models/en-chunker.bin"));

            Assert.NotNull(model);

            var ckr = new ChunkerME(model);

            Assert.NotNull(ckr);

            var preds = chunker.TopKSequences(toks1, tags1, -5.55);

            Assert.AreEqual(4, preds.Length);
            Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count);
            Assert.True(expect1.SequenceEqual(preds[0].Outcomes));
            Assert.False(expect1.SequenceEqual(preds[1].Outcomes));
            


        }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            ChunkerModel model = (new ChunkerModelLoader()).load(@params.Model);

            IList <EvaluationMonitor <ChunkSample> > listeners = new LinkedList <EvaluationMonitor <ChunkSample> >();
            ChunkerDetailedFMeasureListener          detailedFMeasureListener = null;

            if (@params.Misclassified.Value)
            {
                listeners.Add(new ChunkEvaluationErrorListener());
            }
            if (@params.DetailedF.Value)
            {
                detailedFMeasureListener = new ChunkerDetailedFMeasureListener();
                listeners.Add(detailedFMeasureListener);
            }

            ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE), listeners.ToArray());

            PerformanceMonitor monitor = new PerformanceMonitor("sent");

            ObjectStream <ChunkSample> measuredSampleStream = new ObjectStreamAnonymousInnerClassHelper(this, monitor);

            monitor.startAndPrintThroughput();

            try
            {
                evaluator.evaluate(measuredSampleStream);
            }
            catch (IOException e)
            {
                Console.Error.WriteLine("failed");
                throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    measuredSampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            monitor.stopAndPrintFinalResult();

            Console.WriteLine();

            if (detailedFMeasureListener == null)
            {
                Console.WriteLine(evaluator.FMeasure);
            }
            else
            {
                Console.WriteLine(detailedFMeasureListener.ToString());
            }
        }
Example #28
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules without manifest information entries.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType) : this(

                languageCode,
                buildModel,
                checkModel,
                attachModel,
                parserTagger,
                chunkerTagger,
                headRules,
                modelType,
                null) {

        }
Example #29
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            Dictionary<string, string> manifestInfoEntries) : this(

                languageCode,
                buildModel,
                checkModel,
                null,
                parserTagger,
                chunkerTagger,
                headRules,
                ParserType.Chunking,
                manifestInfoEntries) {

        }
Example #30
0
 public ParserModel UpdateChunkerModel(ChunkerModel chunkModel) {
     return new ParserModel(Language, BuildModel, CheckModel, AttachModel, ParserTaggerModel, chunkModel, HeadRules, ParserType);
 }
Example #31
0
        /// <summary>
        /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions.
        /// </summary>
        /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" />
        /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register
        /// the proper serialization/deserialization methods for an new extension.
        /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks>
        protected override void CreateArtifactSerializers() {
            base.CreateArtifactSerializers();
            // note from OpenNLP (for future adaptations)

            // In 1.6.x the headrules artifact is serialized with the new API
            // which uses the Serializable interface
            // This change is not backward compatible with the 1.5.x models.
            // In order to load 1.5.x model the English headrules serializer must be
            // put on the serializer map.

            RegisterArtifactType(".headrules",
                (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream),
                stream => HeadRulesManager.Deserialize(Language, stream));

            RegisterArtifactType(".postagger", (artifact, stream) => {
                var model = artifact as POSModel;
                if (model == null)
                    throw new InvalidOperationException();

                model.Serialize(stream);
            }, stream => {
                var model = new POSModel(stream);

                // The 1.6.x models write the non-default beam size into the model itself.
                // In 1.5.x the parser configured the beam size when the model was loaded,
                // this is not possible anymore with the new APIs
                if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize))
                    return new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory);

                return model;
            });

            RegisterArtifactType(".chunker", (artifact, stream) => {
                var model = artifact as ChunkerModel;
                if (model == null)
                    throw new InvalidOperationException();

                model.Serialize(stream);
            }, stream => {
                var model = new ChunkerModel(stream);

                if (model.Version.Major == 1 && model.Version.Minor == 5) {
                    return new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory());
                }

                return model;
            });
        }
Example #32
0
 public NLPChunkerOp(ChunkerModel chunkerModel)
 {
     chunker = new ChunkerME(chunkerModel);
 }
Example #33
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ChunkerME"/> with the specified <see cref="ChunkerModel"/>.
        /// </summary>
        /// <param name="model">The chunker model.</param>
        public ChunkerME(ChunkerModel model) {
            contextGenerator = model.Factory.GetContextGenerator();
            sequenceValidator = model.Factory.GetSequenceValidator();

            this.model = model.ChunkerSequenceModel ?? new BeamSearch(model.BeamSize, model.MaxentModel);
        }