TokenStream() public method

public TokenStream ( string fieldName, TextReader reader ) : TokenStream
fieldName string
reader System.IO.TextReader
return TokenStream
示例#1
0
 public static string[] SplitWords(string content) {
     List<string> strList = new List<string>();
     Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法
     TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));
     Lucene.Net.Analysis.Token token = null;
     while((token = tokenStream.Next()) != null) { //Next继续分词 直至返回null
         strList.Add(token.TermText()); //得到分词后结果
     }
     return strList.ToArray();
 }
示例#2
0
 /// <summary>
 /// 将字符串经过盘古分词之后返回字符串集合
 /// </summary>
 /// <param name="str"></param>
 /// <returns></returns>
 public static List<string> ChangeStringToSegment(string str)
 {
     Analyzer analyzer = new PanGuAnalyzer();
     TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));
     Lucene.Net.Analysis.Token token = null;
     List<string> list = new List<string>();
     while ((token = tokenStream.Next()) != null)
     {
         list.Add(token.TermText());
     }
     return list;
 }
示例#3
0
        /// <summary>
        /// 对索引分词
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public static string[] SqlitIndexWord(string str)
        {
            //盘古分词 //对输入的搜索条件进行分词
            List<string> list = new List<string>();
            Analyzer analyzer = new PanGuAnalyzer();
            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));
            Lucene.Net.Analysis.Token token = null;
            while ((token = tokenStream.Next()) != null)
            {
                Console.WriteLine(token.TermText());
                list.Add(token.TermText());
            }

            return list.ToArray();
        }
示例#4
0
        //public static string[] SplitWords(string content)
        //{
        //    List<string> strList = new List<string>();
        //    Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法
        //    TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));

        //    Lucene.Net.Analysis.Token token = null;
        //    while ((token = tokenStream.Next()) != null)
        //    { //Next继续分词 直至返回null
        //        strList.Add(token.TermText()); //得到分词后结果
        //    }
        //    return strList.ToArray();
        //}

        #region 分词测试
        /// <summary>
        /// 分词测试
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";
            System.IO.StringReader reader = new System.IO.StringReader(keyword);
            Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法
            TokenStream ts = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return ret;
        }
示例#5
0
        public static string[] SplitWords(string content)
        {
            List<string> strList = new List<string>();
            Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;

            bool hasNext = tokenStream.IncrementToken();

            while (hasNext)
            {
                ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                strList.Add(ita.Term);
                hasNext = tokenStream.IncrementToken();
            }

            return strList.ToArray();
        }
示例#6
0
 public static string[] SplitWords(string content)
 {
     List<string> strList = new List<string>();
     //指定使用盘古 PanGuAnalyzer 分词算法
     Analyzer analyzer = new PanGuAnalyzer();
     TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));
     ITermAttribute term = tokenStream.AddAttribute<ITermAttribute>();
     ITermAttribute tempTerm = null;
     while (tokenStream.IncrementToken())
     {
         //Next继续分词 直至返回null
         //得到分词后结果
         if (tokenStream.HasAttribute<ITermAttribute>())
         {
             tempTerm = tokenStream.GetAttribute<ITermAttribute>();
             strList.Add(tempTerm.Term);
         }
     }
     return strList.ToArray();
 }
示例#7
0
        public static string QueryParserWord(string content)
        {
            StringBuilder sb = new StringBuilder();
            Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法

            TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));

            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;

            bool hasNext = tokenStream.IncrementToken();

            while (hasNext)
            {
                ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();

                sb.Append(ita.Term);
                sb.Append(" ");

                hasNext = tokenStream.IncrementToken();
            }

            return sb.ToString();
        }
示例#8
0
        private static IEnumerable<string> GetSearchTerms(string searchTerm)
        {
            List<string> result = new List<string>();
            var analyzer = new PanGuAnalyzer();
            StringReader sr = new StringReader(searchTerm);
            TokenStream stream = analyzer.TokenStream(null, sr);
            bool hasnext = stream.IncrementToken();
            System.DateTime start = System.DateTime.Now;
            ITermAttribute ita;
            while (hasnext)
            {
                ita = stream.GetAttribute<ITermAttribute>();
                result.Add(ita.Term);
                hasnext = stream.IncrementToken();
            }
            stream.CloneAttributes();
            sr.Close();
            analyzer.Dispose();

            var resultString = string.Join(" ", result);

            return resultString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
                .Concat(new[] { searchTerm })
                .Distinct(StringComparer.OrdinalIgnoreCase)
                .Select(Escape);
        }
示例#9
0
 private static string[] SplitWords(string content)
 {
     List<string> strList = new List<string>();
     PanGuAnalyzer analyzer = new PanGuAnalyzer();
     TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content));
     while(tokenStream.IncrementToken())
     {
         var ita = tokenStream.GetAttribute<ITermAttribute>();
         strList.Add(ita.Term);
     }
     return strList.ToArray();
 }
示例#10
0
 /// <summary>
 /// 盘古分词
 /// </summary>
 /// <param name="sender"></param>
 /// <param name="e"></param>
 public static string[] PanGuSplit(string str)
 {
     Analyzer analyzer = new PanGuAnalyzer();//指定盘古分词算法。
     TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));
     Lucene.Net.Analysis.Token token = null;
     List<string> list = new List<string>();
     while ((token = tokenStream.Next()) != null)
     {
         list.Add(token.TermText());
     }
     return list.ToArray();
 }