Beispiel #1
0
        public List <Tag> GetTags(string paragraph)
        {
            var       bin    = GetFileStream("en-pos-maxent.bin");
            POSModel  model  = new POSModel(bin);
            POSTagger tagger = new POSTaggerME(model);

            var        sentenceSpans = SentPosDetect(paragraph);
            List <Tag> tagsResult    = new List <Tag>();

            foreach (var sentenceSpan in sentenceSpans)
            {
                var sentence = sentenceSpan.getCoveredText(paragraph).toString();
                var start    = sentenceSpan.getStart();
                var end      = sentenceSpan.getEnd();

                var tokenSpans = GetTokens(sentence);
                var tokens     = new string[tokenSpans.Length];
                for (var i = 0; i < tokens.Length; i++)
                {
                    tokens[i] = tokenSpans[i].getCoveredText(sentence).toString();
                    var tag = tagger.tag(new[] { tokenSpans[i].getCoveredText(sentence).toString() }).FirstOrDefault();

                    tagsResult.Add(new Tag
                    {
                        startIndex = start,
                        endIndex   = end,
                        category   = tag
                    });
                }
            }

            return(tagsResult);
        }
Beispiel #2
0
        /// <summary>
        /// Updates the PO for the given id
        /// </summary>
        /// <param name="posModel">PO model object</param>
        /// <returns>Returns int if successfully updated</returns>
        public int Update(POSModel posModel)
        {
            ErrorMessage em = new ErrorMessage();

            try
            {
                baseDal.ConnectName = "POSConnection";
                baseDal.SQLConnect();
                baseDal.ClearParameters();

                baseDal.AddParameter("@in_OrderName", posModel.OrderName, false);
                baseDal.AddParameter("@in_Description", posModel.Description, false);
                baseDal.AddParameter("@in_Id", posModel.Id, false);

                return(Convert.ToInt32(baseDal.ExecuteScalar("sp_UpdatePurchaseOrder")));
            }
            catch (Exception ex)
            {
                em = AddApplicationError(ex);
                return(-1);
            }
            finally
            {
                baseDal.SQLDisconnect();
            }
        }
Beispiel #3
0
        private void InitializePOSTagger()
        {
            InputStream modelIn = null;

            try
            {
                modelIn = new FileInputStream(POSModel);
                POSModel model = new POSModel(modelIn);
                tagger = new POSTaggerME(model);
            }
            catch (IOException ex)
            {
                tagger = null;
            }
            finally
            {
                if (modelIn != null)
                {
                    try
                    {
                        modelIn.close();
                    }
                    catch (IOException ex)
                    {
                    }
                }
            }
        }
Beispiel #4
0
 public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel,
                    IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules,
                    ParserType modelType)
     : this(
         languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType,
         null)
 {
 }
Beispiel #5
0
        public DeterminerPartOfSpeech()
        {
            POSModel posModel;

            using (var modelFile = new FileStream("en-pos-maxent.bin", FileMode.Open))
                posModel = new POSModel(modelFile);
            posTagger = new POSTaggerME(posModel);
        }
        public PartOfSpeechRecognizer()
        {
            POSModel posModel;

            using (var modelFile = new FileStream("en-pos-maxent.bin", FileMode.Open))
                posModel = new POSModel(modelFile);
            PosTagger = new POSTaggerME(posModel);
        }
Beispiel #7
0
        /// <summary>
        /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions.
        /// </summary>
        /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" />
        /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register
        /// the proper serialization/deserialization methods for an new extension.
        /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks>
        protected override void CreateArtifactSerializers()
        {
            base.CreateArtifactSerializers();
            // note from OpenNLP (for future adaptations)

            // In 1.6.x the headrules artifact is serialized with the new API
            // which uses the Serializable interface
            // This change is not backward compatible with the 1.5.x models.
            // In order to load 1.5.x model the English headrules serializer must be
            // put on the serializer map.

            RegisterArtifactType(".headrules",
                                 (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream),
                                 stream => HeadRulesManager.Deserialize(Language, stream));

            RegisterArtifactType(".postagger", (artifact, stream) => {
                var model = artifact as POSModel;
                if (model == null)
                {
                    throw new InvalidOperationException();
                }

                model.Serialize(stream);
            }, stream => {
                var model = new POSModel(stream);

                // The 1.6.x models write the non-default beam size into the model itself.
                // In 1.5.x the parser configured the beam size when the model was loaded,
                // this is not possible anymore with the new APIs
                if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize))
                {
                    return(new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory));
                }

                return(model);
            });

            RegisterArtifactType(".chunker", (artifact, stream) => {
                var model = artifact as ChunkerModel;
                if (model == null)
                {
                    throw new InvalidOperationException();
                }

                model.Serialize(stream);
            }, stream => {
                var model = new ChunkerModel(stream);

                if (model.Version.Major == 1 && model.Version.Minor == 5)
                {
                    return(new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory()));
                }

                return(model);
            });
        }
        private void LoadTagger()
        {
            if (!alreadyLoadTokenizer)
            {
                java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin");
                POSModel posModel = new POSModel(modelInpStream);
                tagger = new POSTaggerME(posModel);

                alreadyLoadTokenizer = true;
            }
        }
Beispiel #9
0
 public static POSModel GetPOSTaggerModel(string modelName, IResourceLoader loader)
 {
     if (!posTaggerModels.TryGetValue(modelName, out POSModel model) || model == null)
     {
         using (Stream resource = loader.OpenResource(modelName))
         {
             model = new POSModel(new ikvm.io.InputStreamWrapper(resource));
         }
         posTaggerModels[modelName] = model;
     }
     return(model);
 }
Beispiel #10
0
        string[] POSTagger(string[] tokens)
        {
            InputStream modelIn = new FileInputStream(modelPath + "en-pos-maxent.zip");
            POSModel    model   = new POSModel(modelIn);
            POSTaggerME tagger  = new POSTaggerME(model);

            string[] tags = tagger.tag(tokens);
            //int i = 0;
            //foreach (string s in tags)
            //{
            //    System.Console.WriteLine("{0} : {1}", tokens[i], s);
            //    debug.Print(tokens[i] + " : " + s + "\n");
            //    i++;
            //}
            return(tags);
        }
Beispiel #11
0
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries)
        {
            switch (modelType)
            {
            case ParserType.Chunking:
                if (attachModel != null)
                {
                    throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel");
                }

                Manifest[PARSER_TYPE] = "CHUNKING";
                break;

            case ParserType.TreeInsert:
                if (attachModel == null)
                {
                    throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                                                "attachModel");
                }

                Manifest[PARSER_TYPE] = "TREEINSERT";

                artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel;

                break;

            default:
                throw new ArgumentException(@"Unknown mode type.", "modelType");
            }

            artifactMap[BUILD_MODEL_ENTRY_NAME]          = buildModel;
            artifactMap[CHECK_MODEL_ENTRY_NAME]          = checkModel;
            artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME]  = parserTagger;
            artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger;
            artifactMap[HEAD_RULES_MODEL_ENTRY_NAME]     = headRules;

            CheckArtifactMap();
        }
Beispiel #12
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="attachModel">The attach model.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="modelType">Type of the model.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        /// <exception cref="System.ArgumentException">
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>.
        /// or
        /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>.
        /// </exception>
        /// <exception cref="ArgumentOutOfRangeException">
        /// Unknown <paramref name="modelType"/> value.
        /// </exception>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            IMaxentModel attachModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            ParserType modelType,
            Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries)
        {
            switch (modelType)
            {
            case ParserType.Chunking:
                if (attachModel != null)
                {
                    throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel));
                }

                Manifest[ParserTypeParameter] = "CHUNKING";
                break;

            case ParserType.TreeInsert:
                if (attachModel == null)
                {
                    throw new ArgumentException(@"attachModel must not be null for treeinsert parser!",
                                                nameof(attachModel));
                }

                Manifest[ParserTypeParameter] = "TREEINSERT";

                artifactMap[EntryAttachModel] = attachModel;

                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type");
            }

            artifactMap[EntryBuildModel]         = buildModel;
            artifactMap[EntryCheckModel]         = checkModel;
            artifactMap[EntryParserTaggerModel]  = parserTagger;
            artifactMap[EntryChunkerTaggerModel] = chunkerTagger;
            artifactMap[EntryHeadRules]          = headRules;

            CheckArtifactMap();
        }
        private void button1_Click(object sender, EventArgs e)
        {
            InputStream modelIn = new FileInputStream("en-pos-maxent.bin");
            POSModel    model   = new POSModel(modelIn);
            // initialize POSTaggerME
            POSTaggerME tagger = new POSTaggerME(model);

            words = textBox1.Text.Split();
            String[] result = tagger.tag(words);

            label1.Text = "";

            for (int i = 0; i < result.Length; i++)
            {
                label1.Text += result[i] + ", ";
            }
        }
Beispiel #14
0
        public void TestPOSModelSerializationPerceptron()
        {
            var posModel = POSTaggerMETest.TrainPOSModel(ModelType.Perceptron);

            using (var stream = new MemoryStream()) {
                posModel.Serialize(new UnclosableStream(stream));

                stream.Seek(0, SeekOrigin.Begin);

                var recreated = new POSModel(stream);

                Assert.AreEqual(posModel.Language, recreated.Language);
                Assert.AreEqual(posModel.Manifest, recreated.Manifest);
                Assert.AreEqual(posModel.PosSequenceModel.GetType(), recreated.PosSequenceModel.GetType());
                Assert.AreEqual(posModel.Factory.GetType(), recreated.Factory.GetType());
            }
        }
Beispiel #15
0
        /// <summary>
        /// Gets PO details for the given id
        /// </summary>
        /// <param name="posModel">POS Model</param>
        /// <returns>Returns PO details</returns>
        public POSModel GetPurchaseOrder(POSModel posModel)
        {
            try
            {
                OpenConnection("POSConnection");
                baseDal.AddParameter("@in_Id", posModel.Id, false);

                return(baseDal.Get <POSModel>("sp_GetPurchaseOrder"));
            }
            catch (Exception ex)
            {
                throw ex;
            }
            finally
            {
                baseDal.SQLDisconnect();
            }
        }
Beispiel #16
0
        public SentenceProcessor(String[] words)
        {
            this.words = new String[words.Length];
            tempWords  = new String[words.Length];
            POS_Tags   = new String[words.Length];

            for (int i = 0; i < words.Length; i++)
            {
                this.words[i] = words[i];
                tempWords[i]  = words[i];
            }

            modelIn = new FileInputStream("en-pos-maxent.bin");
            model   = new POSModel(modelIn);
            tagger  = new POSTaggerME(model);

            POS_Tags = getPOS_Tags();
        }
        public override void run(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine(Help);
            }
            else
            {
                File        parserModelInFile = new File(args[0]);
                ParserModel parserModel       = (new ParserModelLoader()).load(parserModelInFile);

                File     taggerModelInFile = new File(args[1]);
                POSModel taggerModel       = (new POSModelLoader()).load(taggerModelInFile);

                ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel);

                CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel);
            }
        }
        private void LoadModels()
        {
            POSModel posModel;

            using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                posModel = new POSModel(modelFile);
            }

            ChunkerModel chunkerModel;

            using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                chunkerModel = new ChunkerModel(modelFile);
            }

            posTagger = new POSTaggerME(posModel);
            chunker   = new ChunkerME(chunkerModel);
        }
        private void LoadModels(string resourcesFolder)
        {
            POSModel posModel;

            using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                posModel = new POSModel(modelFile);
            }

            ChunkerModel chunkerModel;

            using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                chunkerModel = new ChunkerModel(modelFile);
            }

            posTagger = new POSTaggerME(posModel);
            chunker   = new ChunkerME(chunkerModel);
        }
Beispiel #20
0
        /// <summary>
        /// Deletes the employee for the given id
        /// </summary>
        /// <param name="posModel">POSModel object</param>
        /// <returns>Returns 1 if deleted, else returns -1</returns>
        public int Delete(POSModel posModel)
        {
            try
            {
                OpenConnection("POSConnection");
                baseDal.AddParameter("@in_Id", posModel.Id, false);

                baseDal.Get <POSModel>("sp_DeletePurchaseOrder");
                return(1);
            }
            catch
            {
                return(-1);
            }
            finally
            {
                baseDal.SQLDisconnect();
            }
        }
Beispiel #21
0
        /// <summary>
        /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking.
        /// </summary>
        /// <param name="languageCode">The language code.</param>
        /// <param name="buildModel">The model to assign constituent labels.</param>
        /// <param name="checkModel">The model to determine a constituent is complete.</param>
        /// <param name="parserTagger">The model to assign pos-tags.</param>
        /// <param name="chunkerTagger">The model to assign flat constituent labels.</param>
        /// <param name="headRules">The head rules.</param>
        /// <param name="manifestInfoEntries">The manifest information entries.</param>
        public ParserModel(
            string languageCode,
            IMaxentModel buildModel,
            IMaxentModel checkModel,
            POSModel parserTagger,
            ChunkerModel chunkerTagger,
            AbstractHeadRules headRules,
            Dictionary <string, string> manifestInfoEntries) : this(

                languageCode,
                buildModel,
                checkModel,
                null,
                parserTagger,
                chunkerTagger,
                headRules,
                ParserType.Chunking,
                manifestInfoEntries)
        {
        }
Beispiel #22
0
        public NLP()
        {
            //loading sentence detector model
            java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin");
            SentenceModel           sentenceModel  = new SentenceModel(modelInpStream);

            sentenceDetector = new SentenceDetectorME(sentenceModel);

            //loading tokenizer model
            modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin");
            TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream);

            tokenizer = new TokenizerME(tokenizerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin");
            POSModel posModel = new POSModel(modelInpStream);

            tagger = new POSTaggerME(posModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin");
            ChunkerModel chunkerModel = new ChunkerModel(modelInpStream);

            chunker = new ChunkerME(chunkerModel);

            modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin");
            ParserModel parserModel = new ParserModel(modelInpStream);

            parser = ParserFactory.create(parserModel);

            //loading stop words list
            StreamReader sr = new StreamReader("Resources\\english.stop.txt");
            string       line;

            while ((line = sr.ReadLine()) != null)
            {
                stopwords.Add(Stemming(line));
                stopwords.Add(line);
            }
        }
Beispiel #23
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                POSModel model = (new POSModelLoader()).load(new File(args[0]));

                POSTaggerME tagger = new POSTaggerME(model);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
                        string[] tags = tagger.tag(whitespaceTokenizerLine);

                        POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
                        Console.WriteLine(sample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Beispiel #24
0
        public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences)
        {
            var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin");

            var posModel = new POSModel(posModelStream);

            var pos = new POSTaggerME(posModel);

            var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin");

            var model = new TokenizerModel(modelStream);

            var tokenizer = new TokenizerME(model);

            var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker);

            var chunkerModel = new ChunkerModel(chunkerModelStream);

            var chunker = new ChunkerME(chunkerModel);

            return Sentences.Select(p => {

                var tokens = tokenizer.tokenize(p);

                var tags = pos.tag(tokens);

                var chunks = chunker.chunk(tokens, tags);

                var res = new List<ChunkItem>();

                for (var i = 0; i < chunks.Length; i++)
                {
                    res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] });
                }

                return res;
            });
        }
 public ActionResult Create(POSModel posModel)
 {
     POSDal.Create(posModel);
     return(RedirectToAction("Read"));
 }
Beispiel #26
0
 public ParserModel UpdateTaggerModel(POSModel taggerModel)
 {
     return(new ParserModel(Language, BuildModel, CheckModel, AttachModel, taggerModel, ParserChunkerModel, HeadRules, ParserType));
 }
 /// <summary>
 /// Edit Purchase order details for the given id
 /// </summary>
 /// <param name="posModel">POSModel object</param>
 /// <returns>After successful updation redirects to Read method</returns>
 public ActionResult Edit(POSModel posModel)
 {
     POSDal.Update(posModel);
     return(RedirectToAction("Read"));
 }
        public override void run(string format, string[] args)
        {
            base.run(format, args);

            POSModel model = (new POSModelLoader()).load(@params.Model);

            POSTaggerEvaluationMonitor missclassifiedListener = null;

            if (@params.Misclassified.Value)
            {
                missclassifiedListener = new POSEvaluationErrorListener();
            }

            POSTaggerFineGrainedReportListener reportListener = null;
            File         reportFile         = @params.ReportOutputFile;
            OutputStream reportOutputStream = null;

            if (reportFile != null)
            {
                CmdLineUtil.checkOutputFile("Report Output File", reportFile);
                try
                {
                    reportOutputStream = new FileOutputStream(reportFile);
                    reportListener     = new POSTaggerFineGrainedReportListener(reportOutputStream);
                }
                catch (FileNotFoundException e)
                {
                    throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message);
                }
            }

            POSEvaluator evaluator = new POSEvaluator(new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener, reportListener);

            Console.Write("Evaluating ... ");
            try
            {
                evaluator.evaluate(sampleStream);
            }
            catch (IOException e)
            {
                Console.Error.WriteLine("failed");
                throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e);
            }
            finally
            {
                try
                {
                    sampleStream.close();
                }
                catch (IOException)
                {
                    // sorry that this can fail
                }
            }

            Console.WriteLine("done");

            if (reportListener != null)
            {
                Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath);
                reportListener.writeReport();

                try
                {
                    // TODO: is it a problem to close the stream now?
                    reportOutputStream.close();
                }
                catch (IOException)
                {
                    // nothing to do
                }
            }

            Console.WriteLine();

            Console.WriteLine("Accuracy: " + evaluator.WordAccuracy);
        }
        public POSTagger(FileStream modelStream)
        {
            POSModel model = new POSModel(modelStream);

            this.tagger = new POSTaggerME(model);
        }
        private POSTaggerME PreparePosTagger()
        {
            var model = string.Format(@"Models\{0}-pos-maxent.bin", _language);

            var posModelStream = new FileInputStream(model);
            var posModel = new POSModel(posModelStream);
            posModelStream.close();
            return new POSTaggerME(posModel);
        }
 public POSTagger(POSModel model)
 {
     this.tagger = new POSTaggerME(model);
 }
Beispiel #32
0
 public NLPPOSTaggerOp(POSModel model)
 {
     tagger = new POSTaggerME(model);
 }