public List <Tag> GetTags(string paragraph) { var bin = GetFileStream("en-pos-maxent.bin"); POSModel model = new POSModel(bin); POSTagger tagger = new POSTaggerME(model); var sentenceSpans = SentPosDetect(paragraph); List <Tag> tagsResult = new List <Tag>(); foreach (var sentenceSpan in sentenceSpans) { var sentence = sentenceSpan.getCoveredText(paragraph).toString(); var start = sentenceSpan.getStart(); var end = sentenceSpan.getEnd(); var tokenSpans = GetTokens(sentence); var tokens = new string[tokenSpans.Length]; for (var i = 0; i < tokens.Length; i++) { tokens[i] = tokenSpans[i].getCoveredText(sentence).toString(); var tag = tagger.tag(new[] { tokenSpans[i].getCoveredText(sentence).toString() }).FirstOrDefault(); tagsResult.Add(new Tag { startIndex = start, endIndex = end, category = tag }); } } return(tagsResult); }
/// <summary> /// Updates the PO for the given id /// </summary> /// <param name="posModel">PO model object</param> /// <returns>Returns int if successfully updated</returns> public int Update(POSModel posModel) { ErrorMessage em = new ErrorMessage(); try { baseDal.ConnectName = "POSConnection"; baseDal.SQLConnect(); baseDal.ClearParameters(); baseDal.AddParameter("@in_OrderName", posModel.OrderName, false); baseDal.AddParameter("@in_Description", posModel.Description, false); baseDal.AddParameter("@in_Id", posModel.Id, false); return(Convert.ToInt32(baseDal.ExecuteScalar("sp_UpdatePurchaseOrder"))); } catch (Exception ex) { em = AddApplicationError(ex); return(-1); } finally { baseDal.SQLDisconnect(); } }
private void InitializePOSTagger() { InputStream modelIn = null; try { modelIn = new FileInputStream(POSModel); POSModel model = new POSModel(modelIn); tagger = new POSTaggerME(model); } catch (IOException ex) { tagger = null; } finally { if (modelIn != null) { try { modelIn.close(); } catch (IOException ex) { } } } }
public ParserModel(string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType) : this( languageCode, buildModel, checkModel, attachModel, parserTagger, chunkerTagger, headRules, modelType, null) { }
public DeterminerPartOfSpeech() { POSModel posModel; using (var modelFile = new FileStream("en-pos-maxent.bin", FileMode.Open)) posModel = new POSModel(modelFile); posTagger = new POSTaggerME(posModel); }
public PartOfSpeechRecognizer() { POSModel posModel; using (var modelFile = new FileStream("en-pos-maxent.bin", FileMode.Open)) posModel = new POSModel(modelFile); PosTagger = new POSTaggerME(posModel); }
/// <summary> /// Registers all serializers for their artifact file name extensions. Override this method to register custom file extensions. /// </summary> /// <seealso href="https://msdn.microsoft.com/en-us/library/ms182331.aspx" /> /// <remarks>The subclasses should invoke the <see cref="ArtifactProvider.RegisterArtifactType" /> to register /// the proper serialization/deserialization methods for an new extension. /// Warning: This method is called in constructor of the base class!! Be aware that this method is ONLY designed to register serializers.</remarks> protected override void CreateArtifactSerializers() { base.CreateArtifactSerializers(); // note from OpenNLP (for future adaptations) // In 1.6.x the headrules artifact is serialized with the new API // which uses the Serializable interface // This change is not backward compatible with the 1.5.x models. // In order to load 1.5.x model the English headrules serializer must be // put on the serializer map. RegisterArtifactType(".headrules", (artifact, stream) => HeadRulesManager.Serialize(artifact as AbstractHeadRules, stream), stream => HeadRulesManager.Deserialize(Language, stream)); RegisterArtifactType(".postagger", (artifact, stream) => { var model = artifact as POSModel; if (model == null) { throw new InvalidOperationException(); } model.Serialize(stream); }, stream => { var model = new POSModel(stream); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs if (model.Version.Major == 1 && model.Version.Minor == 5 && !model.Manifest.Contains(Parameters.BeamSize)) { return(new POSModel(model.Language, model.MaxentModel, 10, null, model.Factory)); } return(model); }); RegisterArtifactType(".chunker", (artifact, stream) => { var model = artifact as ChunkerModel; if (model == null) { throw new InvalidOperationException(); } model.Serialize(stream); }, stream => { var model = new ChunkerModel(stream); if (model.Version.Major == 1 && model.Version.Minor == 5) { return(new ChunkerModel(model.Language, model.MaxentModel, new ParserChunkerFactory())); } return(model); }); }
private void LoadTagger() { if (!alreadyLoadTokenizer) { java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin"); POSModel posModel = new POSModel(modelInpStream); tagger = new POSTaggerME(posModel); alreadyLoadTokenizer = true; } }
public static POSModel GetPOSTaggerModel(string modelName, IResourceLoader loader) { if (!posTaggerModels.TryGetValue(modelName, out POSModel model) || model == null) { using (Stream resource = loader.OpenResource(modelName)) { model = new POSModel(new ikvm.io.InputStreamWrapper(resource)); } posTaggerModels[modelName] = model; } return(model); }
string[] POSTagger(string[] tokens) { InputStream modelIn = new FileInputStream(modelPath + "en-pos-maxent.zip"); POSModel model = new POSModel(modelIn); POSTaggerME tagger = new POSTaggerME(model); string[] tags = tagger.tag(tokens); //int i = 0; //foreach (string s in tags) //{ // System.Console.WriteLine("{0} : {1}", tokens[i], s); // debug.Print(tokens[i] + " : " + s + "\n"); // i++; //} return(tags); }
public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", "attachModel"); } Manifest[PARSER_TYPE] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", "attachModel"); } Manifest[PARSER_TYPE] = "TREEINSERT"; artifactMap[ATTACH_MODEL_ENTRY_NAME] = attachModel; break; default: throw new ArgumentException(@"Unknown mode type.", "modelType"); } artifactMap[BUILD_MODEL_ENTRY_NAME] = buildModel; artifactMap[CHECK_MODEL_ENTRY_NAME] = checkModel; artifactMap[PARSER_TAGGER_MODEL_ENTRY_NAME] = parserTagger; artifactMap[CHUNKER_TAGGER_MODEL_ENTRY_NAME] = chunkerTagger; artifactMap[HEAD_RULES_MODEL_ENTRY_NAME] = headRules; CheckArtifactMap(); }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="attachModel">The attach model.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="modelType">Type of the model.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> /// <exception cref="System.ArgumentException"> /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.Chunking"/> the <paramref name="attachModel"/> must be <c>null</c>. /// or /// If the <paramref name="modelType"/> is equal to <see cref="Parser.ParserType.TreeInsert"/> the <paramref name="attachModel"/> must not be <c>null</c>. /// </exception> /// <exception cref="ArgumentOutOfRangeException"> /// Unknown <paramref name="modelType"/> value. /// </exception> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, IMaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, ParserType modelType, Dictionary <string, string> manifestInfoEntries) : base(ComponentName, languageCode, manifestInfoEntries) { switch (modelType) { case ParserType.Chunking: if (attachModel != null) { throw new ArgumentException(@"attachModel must be null for chunking parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "CHUNKING"; break; case ParserType.TreeInsert: if (attachModel == null) { throw new ArgumentException(@"attachModel must not be null for treeinsert parser!", nameof(attachModel)); } Manifest[ParserTypeParameter] = "TREEINSERT"; artifactMap[EntryAttachModel] = attachModel; break; default: throw new ArgumentOutOfRangeException(nameof(modelType), "Unknown model type"); } artifactMap[EntryBuildModel] = buildModel; artifactMap[EntryCheckModel] = checkModel; artifactMap[EntryParserTaggerModel] = parserTagger; artifactMap[EntryChunkerTaggerModel] = chunkerTagger; artifactMap[EntryHeadRules] = headRules; CheckArtifactMap(); }
private void button1_Click(object sender, EventArgs e) { InputStream modelIn = new FileInputStream("en-pos-maxent.bin"); POSModel model = new POSModel(modelIn); // initialize POSTaggerME POSTaggerME tagger = new POSTaggerME(model); words = textBox1.Text.Split(); String[] result = tagger.tag(words); label1.Text = ""; for (int i = 0; i < result.Length; i++) { label1.Text += result[i] + ", "; } }
public void TestPOSModelSerializationPerceptron() { var posModel = POSTaggerMETest.TrainPOSModel(ModelType.Perceptron); using (var stream = new MemoryStream()) { posModel.Serialize(new UnclosableStream(stream)); stream.Seek(0, SeekOrigin.Begin); var recreated = new POSModel(stream); Assert.AreEqual(posModel.Language, recreated.Language); Assert.AreEqual(posModel.Manifest, recreated.Manifest); Assert.AreEqual(posModel.PosSequenceModel.GetType(), recreated.PosSequenceModel.GetType()); Assert.AreEqual(posModel.Factory.GetType(), recreated.Factory.GetType()); } }
/// <summary> /// Gets PO details for the given id /// </summary> /// <param name="posModel">POS Model</param> /// <returns>Returns PO details</returns> public POSModel GetPurchaseOrder(POSModel posModel) { try { OpenConnection("POSConnection"); baseDal.AddParameter("@in_Id", posModel.Id, false); return(baseDal.Get <POSModel>("sp_GetPurchaseOrder")); } catch (Exception ex) { throw ex; } finally { baseDal.SQLDisconnect(); } }
public SentenceProcessor(String[] words) { this.words = new String[words.Length]; tempWords = new String[words.Length]; POS_Tags = new String[words.Length]; for (int i = 0; i < words.Length; i++) { this.words[i] = words[i]; tempWords[i] = words[i]; } modelIn = new FileInputStream("en-pos-maxent.bin"); model = new POSModel(modelIn); tagger = new POSTaggerME(model); POS_Tags = getPOS_Tags(); }
public override void run(string[] args) { if (args.Length != 2) { Console.WriteLine(Help); } else { File parserModelInFile = new File(args[0]); ParserModel parserModel = (new ParserModelLoader()).load(parserModelInFile); File taggerModelInFile = new File(args[1]); POSModel taggerModel = (new POSModelLoader()).load(taggerModelInFile); ParserModel updatedParserModel = parserModel.updateTaggerModel(taggerModel); CmdLineUtil.writeModel("parser", parserModelInFile, updatedParserModel); } }
private void LoadModels() { POSModel posModel; using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { posModel = new POSModel(modelFile); } ChunkerModel chunkerModel; using (var modelFile = new FileStream(Path.Combine(configuration.Resources, configuration.NlpModels, "en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { chunkerModel = new ChunkerModel(modelFile); } posTagger = new POSTaggerME(posModel); chunker = new ChunkerME(chunkerModel); }
private void LoadModels(string resourcesFolder) { POSModel posModel; using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-pos-maxent.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { posModel = new POSModel(modelFile); } ChunkerModel chunkerModel; using (var modelFile = new FileStream(Path.Combine(resourcesFolder, @"1.5/en-chunker.bin"), FileMode.Open, FileAccess.Read, FileShare.Read)) { chunkerModel = new ChunkerModel(modelFile); } posTagger = new POSTaggerME(posModel); chunker = new ChunkerME(chunkerModel); }
/// <summary> /// Deletes the employee for the given id /// </summary> /// <param name="posModel">POSModel object</param> /// <returns>Returns 1 if deleted, else returns -1</returns> public int Delete(POSModel posModel) { try { OpenConnection("POSConnection"); baseDal.AddParameter("@in_Id", posModel.Id, false); baseDal.Get <POSModel>("sp_DeletePurchaseOrder"); return(1); } catch { return(-1); } finally { baseDal.SQLDisconnect(); } }
/// <summary> /// Initializes a new instance of the <see cref="ParserModel"/> using the specified models and head rules using the model type as chunking. /// </summary> /// <param name="languageCode">The language code.</param> /// <param name="buildModel">The model to assign constituent labels.</param> /// <param name="checkModel">The model to determine a constituent is complete.</param> /// <param name="parserTagger">The model to assign pos-tags.</param> /// <param name="chunkerTagger">The model to assign flat constituent labels.</param> /// <param name="headRules">The head rules.</param> /// <param name="manifestInfoEntries">The manifest information entries.</param> public ParserModel( string languageCode, IMaxentModel buildModel, IMaxentModel checkModel, POSModel parserTagger, ChunkerModel chunkerTagger, AbstractHeadRules headRules, Dictionary <string, string> manifestInfoEntries) : this( languageCode, buildModel, checkModel, null, parserTagger, chunkerTagger, headRules, ParserType.Chunking, manifestInfoEntries) { }
public NLP() { //loading sentence detector model java.io.FileInputStream modelInpStream = new java.io.FileInputStream("Resources\\en-sent.bin"); SentenceModel sentenceModel = new SentenceModel(modelInpStream); sentenceDetector = new SentenceDetectorME(sentenceModel); //loading tokenizer model modelInpStream = new java.io.FileInputStream("Resources\\en-token.bin"); TokenizerModel tokenizerModel = new TokenizerModel(modelInpStream); tokenizer = new TokenizerME(tokenizerModel); modelInpStream = new java.io.FileInputStream("Resources\\en-pos-maxent.bin"); POSModel posModel = new POSModel(modelInpStream); tagger = new POSTaggerME(posModel); modelInpStream = new java.io.FileInputStream("Resources\\en-chunker.bin"); ChunkerModel chunkerModel = new ChunkerModel(modelInpStream); chunker = new ChunkerME(chunkerModel); modelInpStream = new java.io.FileInputStream("Resources\\en-parser-chunking.bin"); ParserModel parserModel = new ParserModel(modelInpStream); parser = ParserFactory.create(parserModel); //loading stop words list StreamReader sr = new StreamReader("Resources\\english.stop.txt"); string line; while ((line = sr.ReadLine()) != null) { stopwords.Add(Stemming(line)); stopwords.Add(line); } }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { POSModel model = (new POSModelLoader()).load(new File(args[0])); POSTaggerME tagger = new POSTaggerME(model); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); string[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); Console.WriteLine(sample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences) { var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin"); var posModel = new POSModel(posModelStream); var pos = new POSTaggerME(posModel); var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin"); var model = new TokenizerModel(modelStream); var tokenizer = new TokenizerME(model); var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker); var chunkerModel = new ChunkerModel(chunkerModelStream); var chunker = new ChunkerME(chunkerModel); return Sentences.Select(p => { var tokens = tokenizer.tokenize(p); var tags = pos.tag(tokens); var chunks = chunker.chunk(tokens, tags); var res = new List<ChunkItem>(); for (var i = 0; i < chunks.Length; i++) { res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] }); } return res; }); }
public ActionResult Create(POSModel posModel) { POSDal.Create(posModel); return(RedirectToAction("Read")); }
public ParserModel UpdateTaggerModel(POSModel taggerModel) { return(new ParserModel(Language, BuildModel, CheckModel, AttachModel, taggerModel, ParserChunkerModel, HeadRules, ParserType)); }
/// <summary> /// Edit Purchase order details for the given id /// </summary> /// <param name="posModel">POSModel object</param> /// <returns>After successful updation redirects to Read method</returns> public ActionResult Edit(POSModel posModel) { POSDal.Update(posModel); return(RedirectToAction("Read")); }
public override void run(string format, string[] args) { base.run(format, args); POSModel model = (new POSModelLoader()).load(@params.Model); POSTaggerEvaluationMonitor missclassifiedListener = null; if (@params.Misclassified.Value) { missclassifiedListener = new POSEvaluationErrorListener(); } POSTaggerFineGrainedReportListener reportListener = null; File reportFile = @params.ReportOutputFile; OutputStream reportOutputStream = null; if (reportFile != null) { CmdLineUtil.checkOutputFile("Report Output File", reportFile); try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new POSTaggerFineGrainedReportListener(reportOutputStream); } catch (FileNotFoundException e) { throw new TerminateToolException(-1, "IO error while creating POS Tagger fine-grained report file: " + e.Message); } } POSEvaluator evaluator = new POSEvaluator(new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener, reportListener); Console.Write("Evaluating ... "); try { evaluator.evaluate(sampleStream); } catch (IOException e) { Console.Error.WriteLine("failed"); throw new TerminateToolException(-1, "IO error while reading test data: " + e.Message, e); } finally { try { sampleStream.close(); } catch (IOException) { // sorry that this can fail } } Console.WriteLine("done"); if (reportListener != null) { Console.WriteLine("Writing fine-grained report to " + @params.ReportOutputFile.AbsolutePath); reportListener.writeReport(); try { // TODO: is it a problem to close the stream now? reportOutputStream.close(); } catch (IOException) { // nothing to do } } Console.WriteLine(); Console.WriteLine("Accuracy: " + evaluator.WordAccuracy); }
public POSTagger(FileStream modelStream) { POSModel model = new POSModel(modelStream); this.tagger = new POSTaggerME(model); }
private POSTaggerME PreparePosTagger() { var model = string.Format(@"Models\{0}-pos-maxent.bin", _language); var posModelStream = new FileInputStream(model); var posModel = new POSModel(posModelStream); posModelStream.close(); return new POSTaggerME(posModel); }
public POSTagger(POSModel model) { this.tagger = new POSTaggerME(model); }
public NLPPOSTaggerOp(POSModel model) { tagger = new POSTaggerME(model); }