Exemplo n.º 1
0
        public List <Tag> GetTags(string paragraph)
        {
            var       bin    = GetFileStream("en-pos-maxent.bin");
            POSModel  model  = new POSModel(bin);
            POSTagger tagger = new POSTaggerME(model);

            var        sentenceSpans = SentPosDetect(paragraph);
            List <Tag> tagsResult    = new List <Tag>();

            foreach (var sentenceSpan in sentenceSpans)
            {
                var sentence = sentenceSpan.getCoveredText(paragraph).toString();
                var start    = sentenceSpan.getStart();
                var end      = sentenceSpan.getEnd();

                var tokenSpans = GetTokens(sentence);
                var tokens     = new string[tokenSpans.Length];
                for (var i = 0; i < tokens.Length; i++)
                {
                    tokens[i] = tokenSpans[i].getCoveredText(sentence).toString();
                    var tag = tagger.tag(new[] { tokenSpans[i].getCoveredText(sentence).toString() }).FirstOrDefault();

                    tagsResult.Add(new Tag
                    {
                        startIndex = start,
                        endIndex   = end,
                        category   = tag
                    });
                }
            }

            return(tagsResult);
        }
Exemplo n.º 2
0
        string[] POSTagger(string[] tokens)
        {
            InputStream modelIn = new FileInputStream(modelPath + "en-pos-maxent.zip");
            POSModel    model   = new POSModel(modelIn);
            POSTaggerME tagger  = new POSTaggerME(model);

            string[] tags = tagger.tag(tokens);
            //int i = 0;
            //foreach (string s in tags)
            //{
            //    System.Console.WriteLine("{0} : {1}", tokens[i], s);
            //    debug.Print(tokens[i] + " : " + s + "\n");
            //    i++;
            //}
            return(tags);
        }
Exemplo n.º 3
0
        private void button1_Click(object sender, EventArgs e)
        {
            InputStream modelIn = new FileInputStream("en-pos-maxent.bin");
            POSModel    model   = new POSModel(modelIn);
            // initialize POSTaggerME
            POSTaggerME tagger = new POSTaggerME(model);

            words = textBox1.Text.Split();
            String[] result = tagger.tag(words);

            label1.Text = "";

            for (int i = 0; i < result.Length; i++)
            {
                label1.Text += result[i] + ", ";
            }
        }
Exemplo n.º 4
0
        public override void run(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine(Help);
            }
            else
            {
                POSModel model = (new POSModelLoader()).load(new File(args[0]));

                POSTaggerME tagger = new POSTaggerME(model);

                ObjectStream <string> lineStream = new PlainTextByLineStream(new InputStreamReader(Console.OpenStandardInput));

                PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
                perfMon.start();

                try
                {
                    string line;
                    while ((line = lineStream.read()) != null)
                    {
                        string[] whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
                        string[] tags = tagger.tag(whitespaceTokenizerLine);

                        POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
                        Console.WriteLine(sample.ToString());

                        perfMon.incrementCounter();
                    }
                }
                catch (IOException e)
                {
                    CmdLineUtil.handleStdinIoError(e);
                }

                perfMon.stopAndPrintFinalResult();
            }
        }
Exemplo n.º 5
0
 /// <summary>
 /// 使用openNLP做英文詞性標註。model: en-pos-maxent.bin。
 /// </summary>
 /// <param name="tokens">每個需要標註的tokens。</param>
 /// <returns>POS array</returns>
 public string[] POS(string[] tokens)
 {
     LoadTagger();
     return(tagger.tag(tokens));
 }
Exemplo n.º 6
0
        public String[] getPOS_Tags()
        {
            String[] result = tagger.tag(words);

            return(result);
        }
Exemplo n.º 7
0
 public string[] Chunking(string[] tokens)
 {
     string[] pos = tagger.tag(tokens);
     return(Chunking(tokens, pos));
 }
Exemplo n.º 8
0
        public static IEnumerable<IEnumerable<ChunkItem>> GetChunks(IEnumerable<string> Sentences)
        {
            var posModelStream = new java.io.ByteArrayInputStream(Resource.en_pos_maxent);//new java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-pos-maxent.bin");

            var posModel = new POSModel(posModelStream);

            var pos = new POSTaggerME(posModel);

            var modelStream = new java.io.ByteArrayInputStream(Resource.en_token); //java.io.FileInputStream(@"C:\dev\d-mill\TextProcessing\OpenNLP\Models\en-token.bin");

            var model = new TokenizerModel(modelStream);

            var tokenizer = new TokenizerME(model);

            var chunkerModelStream = new java.io.ByteArrayInputStream(Resource.en_chunker);

            var chunkerModel = new ChunkerModel(chunkerModelStream);

            var chunker = new ChunkerME(chunkerModel);

            return Sentences.Select(p => {

                var tokens = tokenizer.tokenize(p);

                var tags = pos.tag(tokens);

                var chunks = chunker.chunk(tokens, tags);

                var res = new List<ChunkItem>();

                for (var i = 0; i < chunks.Length; i++)
                {
                    res.Add(new ChunkItem { token = tokens[i], tag = tags[i], chunk = chunks[i] });
                }

                return res;
            });
        }