public List <Tag> GetTags(string paragraph) { var bin = GetFileStream("en-pos-maxent.bin"); POSModel model = new POSModel(bin); POSTagger tagger = new POSTaggerME(model); var sentenceSpans = SentPosDetect(paragraph); List <Tag> tagsResult = new List <Tag>(); foreach (var sentenceSpan in sentenceSpans) { var sentence = sentenceSpan.getCoveredText(paragraph).toString(); var start = sentenceSpan.getStart(); var end = sentenceSpan.getEnd(); var tokenSpans = GetTokens(sentence); var tokens = new string[tokenSpans.Length]; for (var i = 0; i < tokens.Length; i++) { tokens[i] = tokenSpans[i].getCoveredText(sentence).toString(); var tag = tagger.tag(new[] { tokenSpans[i].getCoveredText(sentence).toString() }).FirstOrDefault(); tagsResult.Add(new Tag { startIndex = start, endIndex = end, category = tag }); } } return(tagsResult); }
string[] POSTagger(string[] tokens) { InputStream modelIn = new FileInputStream(modelPath + "en-pos-maxent.zip"); POSModel model = new POSModel(modelIn); POSTaggerME tagger = new POSTaggerME(model); string[] tags = tagger.tag(tokens); //int i = 0; //foreach (string s in tags) //{ // System.Console.WriteLine("{0} : {1}", tokens[i], s); // debug.Print(tokens[i] + " : " + s + "\n"); // i++; //} return(tags); }
private void button1_Click(object sender, EventArgs e) { InputStream modelIn = new FileInputStream("en-pos-maxent.bin"); POSModel model = new POSModel(modelIn); // initialize POSTaggerME POSTaggerME tagger = new POSTaggerME(model); words = textBox1.Text.Split(); String[] result = tagger.tag(words); label1.Text = ""; for (int i = 0; i < result.Length; i++) { label1.Text += result[i] + ", "; } }
public override void run(string[] args) { if (args.Length != 1) { Console.WriteLine(Help); } else { POSModel model = (new POSModelLoader()).load(new File(args[0])); POSTaggerME tagger = new POSTaggerME(model); ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput)); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); try { string line; while ((line = lineStream.read()) != null) { string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line); string[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); Console.WriteLine(sample.ToString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
/// <summary> /// 使用openNLP做英文詞性標註。model: en-pos-maxent.bin。 /// </summary> /// <param name="tokens">每個需要標註的tokens。</param> /// <returns>POS array</returns> public string[] POS(string[] tokens) { LoadTagger(); return(tagger.tag(tokens)); }
public String[] getPOS_Tags() { String[] result = tagger.tag(words); return(result); }
public string[] Chunking(string[] tokens) { string[] pos = tagger.tag(tokens); return(Chunking(tokens, pos)); }
public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences) { var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin"); var posModel = new POSModel(posModelStream); var pos = new POSTaggerME(posModel); var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin"); var model = new TokenizerModel(modelStream); var tokenizer = new TokenizerME(model); var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker); var chunkerModel = new ChunkerModel(chunkerModelStream); var chunker = new ChunkerME(chunkerModel); return Sentences.Select(p => { var tokens = tokenizer.tokenize(p); var tags = pos.tag(tokens); var chunks = chunker.chunk(tokens, tags); var res = new List<ChunkItem>(); for (var i = 0; i < chunks.Length; i++) { res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] }); } return res; }); }