private Lucene.Net.Analysis.Token Next() { var res = _iter.MoveNext(); if (!res) { return(null); } var word = _iter.Current; if (word == null) { return(null); } var token = new Lucene.Net.Analysis.Token(word.Word, word.StartIndex, word.EndIndex); return(token); }
private Lucene.Net.Analysis.Token Next() { var res = _iter.MoveNext(); if (res) { var word = _iter.Current; var token = new Lucene.Net.Analysis.Token(word.Word, word.StartIndex, word.EndIndex); if (Settings.Log) { //chinese char var zh = new Regex(@"[\u4e00-\u9fa5]|[^\x00-\xff]"); var offset = zh.Matches(word.Word).Count; var len = 10; offset = offset > len ? 0 : offset; Console.WriteLine($"==分词:{ word.Word.PadRight(len - offset, '=') }==起始位置:{ word.StartIndex.ToString().PadLeft(3, '=') }==结束位置{ word.EndIndex.ToString().PadLeft(3, '=') }"); } return(token); } return(null); }