コード例 #1
0
        public Span[] GetTokens(string paragraph)
        {
            var            bin       = GetFileStream("en-token.bin");
            TokenizerModel model     = new TokenizerModel(bin);
            TokenizerME    tokenizer = new TokenizerME(model);

            Span[] tokens = tokenizer.tokenizePos(paragraph);

            bin.close();

            return(tokens);
        }
コード例 #2
0
        public string[] Tokenization(string str)
        {
            Span[]        tokenSpans = tokenizer.tokenizePos(str);
            List <String> list       = new List <String>();

            foreach (Span span in tokenSpans)
            {
                list.Add(str.Substring(span.getStart(), span.getEnd() - span.getStart()));
            }

            return(list.ToArray());
        }
コード例 #3
0
        /// <summary>
        /// 使用openNLP英文斷詞。model: en-token.bin。
        /// </summary>
        /// <param name="sentence">需要斷詞的英文句子。</param>
        /// <returns>斷完的每個Token</returns>
        public string[] Tokenize(string sentence)
        {
            LoadTokenizer();
            Span[]        tokenSpans = tokenizer.tokenizePos(sentence);
            List <String> list       = new List <String>();

            foreach (Span span in tokenSpans)
            {
                list.Add(sentence.Substring(span.getStart(), span.getEnd() - span.getStart()));
            }

            return(list.ToArray());
        }
コード例 #4
0
 public virtual Span[] GetTerms(string sentence)
 {
     lock (this)
     {
         if (tokenizer == null)
         {
             Span[] span1 = new Span[1];
             span1[0] = new Span(0, sentence.Length);
             return(span1);
         }
         return(tokenizer.tokenizePos(sentence));
     }
 }
コード例 #5
0
 public virtual Span[] GetTerms(string sentence)
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         if (tokenizer is null)
         {
             Span[] span1 = new Span[1];
             span1[0] = new Span(0, sentence.Length);
             return(span1);
         }
         return(tokenizer.tokenizePos(sentence));
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }