예제 #1
0
        private Lucene.Net.Analysis.Token Next()
        {
            var res = _iter.MoveNext();

            if (!res)
            {
                return(null);
            }

            var word = _iter.Current;

            if (word == null)
            {
                return(null);
            }
            var token = new Lucene.Net.Analysis.Token(word.Word, word.StartIndex, word.EndIndex);

            return(token);
        }
예제 #2
0
        private Lucene.Net.Analysis.Token Next()
        {
            var res = _iter.MoveNext();

            if (res)
            {
                var word  = _iter.Current;
                var token = new Lucene.Net.Analysis.Token(word.Word, word.StartIndex, word.EndIndex);
                if (Settings.Log)
                {
                    //chinese char
                    var zh     = new Regex(@"[\u4e00-\u9fa5]|[^\x00-\xff]");
                    var offset = zh.Matches(word.Word).Count;
                    var len    = 10;
                    offset = offset > len ? 0 : offset;
                    Console.WriteLine($"==分词:{ word.Word.PadRight(len - offset, '=') }==起始位置:{ word.StartIndex.ToString().PadLeft(3, '=') }==结束位置{ word.EndIndex.ToString().PadLeft(3, '=') }");
                }
                return(token);
            }
            return(null);
        }