public void v() { //Analyzer analyzer = new CJKAnalyzer(); //TokenStream tokenStream = analyzer.TokenStream("", new StringReader("我爱你中国China中华人名共和国")); //Lucene.Net.Analysis.Token token = null; //while ((token = tokenStream.Next()) != null) //{ // Response.Write(token.TermText() + "<br/>"); //} Lucene.Net.Analysis.Standard.StandardAnalyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); string s = "我日中华人民共和国"; System.IO.StringReader reader = new System.IO.StringReader(s); Lucene.Net.Analysis.TokenStream ts = a.TokenStream(s, reader); bool hasnext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasnext) { ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); Console.WriteLine(ita.Term); hasnext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); a.Close(); Console.ReadKey(); }
public void TestMethod1() { Lucene.Net.Analysis.Standard.StandardAnalyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30); string s = "我日中华人民共和国"; System.IO.StringReader reader = new System.IO.StringReader(s); Lucene.Net.Analysis.TokenStream ts = a.TokenStream(s, reader); bool hasnext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasnext) { ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); Console.WriteLine(ita.Term); hasnext = ts.IncrementToken(); } Console.WriteLine("over"); ts.CloneAttributes(); reader.Close(); a.Close(); }
/// <summary> /// 分词测试 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public string Token(string keyword) { string ret = ""; System.IO.StringReader reader = new System.IO.StringReader(keyword); Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader); Lucene.Net.Analysis.Token token = ts.Next(); while (token != null) { ret += " " + token.TermText(); token = ts.Next(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return(ret); }
/// <summary> /// 分词测试 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public string Token(string keyword) { string ret = ""; System.IO.StringReader reader = new System.IO.StringReader(keyword); Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader); bool hasNext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasNext) { ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); ret += ita.Term + "|"; hasNext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return(ret); }
/// <summary> /// 分词方法 /// </summary> /// <param name="words">待分词内容</param> /// <param name="analyzer"></param> /// <returns></returns> private string cutWords(string words, Analyzer analyzer) { string resultStr = ""; System.IO.StringReader reader = new System.IO.StringReader(words); Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(words, reader); bool hasNext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasNext) { ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); resultStr += ita.Term + "|"; hasNext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return(resultStr); }
public static List <string> SplitWords(string content) { List <string> strList = new List <string>(); using (Analyzer analyzer = new PanGuAnalyzer())//指定使用盘古 PanGuAnalyzer 分词算法 { using (System.IO.StringReader reader = new System.IO.StringReader(content)) { Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(content, reader); while (ts.IncrementToken()) { var ita = ts.GetAttribute <Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); strList.Add(ita.Term); } ts.CloneAttributes(); } } return(strList); }