示例#1
0
        public void TestTokenizeWithSpace()
        {
            var seg = new JiebaSegmenter();

            var s      = "永和服装饰品有限公司";
            var tokens = seg.Tokenize(s).ToList();

            Assert.That(tokens.Count, Is.EqualTo(4));
            Assert.That(tokens.Last().EndIndex, Is.EqualTo(s.Length));

            s      = "永和服装饰品 有限公司";
            tokens = seg.Tokenize(s).ToList();
            Assert.That(tokens.Count, Is.EqualTo(5));
            Assert.That(tokens.Last().EndIndex, Is.EqualTo(s.Length));
        }
示例#2
0
        public void TestTokenize()
        {
            var seg = new JiebaSegmenter();

            foreach (var token in seg.Tokenize("小明最近在学习机器学习、自然语言处理、云计算和大数据"))
            {
                Console.WriteLine(token);
            }
            Console.WriteLine();

            foreach (var token in seg.Tokenize("小明最近在学习机器学习、自然语言处理、云计算和大数据", TokenizerMode.Search))
            {
                Console.WriteLine(token);
            }
        }
示例#3
0
        static public IEnumerable <string> SegmentFor(string segmenterString)
        {
            JiebaSegmenter jiebaSegmenter = new JiebaSegmenter();

            return(jiebaSegmenter.Tokenize(segmenterString, TokenizerMode.Search)
                   .Select(x => x.Word));
        }
示例#4
0
 public JiebaTokenizer(JiebaSegmenter seg, string input)
 {
     segmenter = seg;
     termAtt   = AddAttribute <ITermAttribute>();
     offsetAtt = AddAttribute <IOffsetAttribute>();
     typeAtt   = AddAttribute <ITypeAttribute>();
     tokens    = segmenter.Tokenize(input, TokenizerMode.Search).ToList();
 }
示例#5
0
        public override void Reset()
        {
            base.Reset();

            _inputText = ReadToEnd(base.m_input);
            RemoveStopWords(_segmenter.Tokenize(_inputText, _mode));

            _iter = _wordList.GetEnumerator();
        }
示例#6
0
 public void TokenizeSearchDemo()
 {
     var segmenter = new JiebaSegmenter();
     var s = "永和服装饰品有限公司";
     var tokens = segmenter.Tokenize(s, TokenizerMode.Search);
     foreach (var token in tokens)
     {
         Console.WriteLine("word {0,-12} start: {1,-3} end: {2,-3}", token.Word, token.StartIndex, token.EndIndex);
     }
 }
示例#7
0
        public void TokenizeSearchDemo()
        {
            var segmenter = new JiebaSegmenter();
            var s         = "永和服装饰品有限公司";
            var tokens    = segmenter.Tokenize(s, TokenizerMode.Search);

            foreach (var token in tokens)
            {
                Console.WriteLine("word {0,-12} start: {1,-3} end: {2,-3}", token.Word, token.StartIndex, token.EndIndex);
            }
        }
示例#8
0
        public override void Reset()
        {
            base.Reset();

            _InputText = ReadToEnd(base.m_input);
            RemoveStopWords(segmenter.Tokenize(_InputText, mode));


            start = 0;
            iter  = _WordList.GetEnumerator();
        }
        /// <summary>
        /// 搜索模式
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        protected void btnSearch_Click(object sender, EventArgs e)
        {
            var segmenter = new JiebaSegmenter();
            var s         = txtInput.Text;
            var tokens    = segmenter.Tokenize(s, TokenizerMode.Search);

            foreach (var token in tokens)
            {
                Response.Write(string.Format("word {0,-12} start: {1,-3} end: {2,-3}<br>", token.Word, token.StartIndex, token.EndIndex));
            }
        }
示例#10
0
        public override void Reset()
        {
            base.Reset();

            _InputText = ReadToEnd(base.m_input);
            var jiabeToken = segmenter.Tokenize(_InputText);

            RemoveStopWords(jiabeToken);
            //foreach (var item in jiabeToken)
            //{
            //    var token = new JiebaNet.Segmenter.Token ()
            //    _WordList.Add();
            //}

            start = 0;
            iter  = _WordList.GetEnumerator();
        }
示例#11
0
 public IEnumerable <Segmenter.Token> Tokenize(string text, TokenizerMode mode = TokenizerMode.Search)
 {
     return(segmenter.Tokenize(text, mode));
 }
示例#12
0
        public void TestTokenize()
        {
            var seg = new JiebaSegmenter();
            seg.AddWord("机器学习");
            seg.AddWord("自然语言处理");
            foreach (var token in seg.Tokenize("小明最近在学习机器学习、自然语言处理、云计算和大数据"))
            {
                Console.WriteLine(token);
            }

            foreach (var token in seg.Tokenize("小明最近在学习机器学习、自然语言处理、云计算和大数据", TokenizerMode.Search))
            {
                Console.WriteLine(token);
            }
        }
示例#13
0
 /// <summary>
 /// 指定分词方法
 /// </summary>
 /// <returns></returns>
 virtual protected IEnumerable <SegmenterToken> Segmenter(string segmenterString)
 {
     return(jiebaSegmenter.Tokenize(segmenterString, TokenizerMode.Search)
            .Select(x => new SegmenterToken(x.Word, x.StartIndex)));
 }