public TokenStream ( string fieldName, |
||
fieldName | string | |
reader | ||
return | TokenStream |
public static string[] SplitWords(string content) { List<string> strList = new List<string>(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Token token = null; while((token = tokenStream.Next()) != null) { //Next继续分词 直至返回null strList.Add(token.TermText()); //得到分词后结果 } return strList.ToArray(); }
/// <summary> /// 将字符串经过盘古分词之后返回字符串集合 /// </summary> /// <param name="str"></param> /// <returns></returns> public static List<string> ChangeStringToSegment(string str) { Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; List<string> list = new List<string>(); while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return list; }
/// <summary> /// 对索引分词 /// </summary> /// <param name="str"></param> /// <returns></returns> public static string[] SqlitIndexWord(string str) { //盘古分词 //对输入的搜索条件进行分词 List<string> list = new List<string>(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; while ((token = tokenStream.Next()) != null) { Console.WriteLine(token.TermText()); list.Add(token.TermText()); } return list.ToArray(); }
//public static string[] SplitWords(string content) //{ // List<string> strList = new List<string>(); // Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 // TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); // Lucene.Net.Analysis.Token token = null; // while ((token = tokenStream.Next()) != null) // { //Next继续分词 直至返回null // strList.Add(token.TermText()); //得到分词后结果 // } // return strList.ToArray(); //} #region 分词测试 /// <summary> /// 分词测试 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public string Token(string keyword) { string ret = ""; System.IO.StringReader reader = new System.IO.StringReader(keyword); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream ts = analyzer.TokenStream(keyword, reader); bool hasNext = ts.IncrementToken(); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; while (hasNext) { ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); ret += ita.Term + "|"; hasNext = ts.IncrementToken(); } ts.CloneAttributes(); reader.Close(); analyzer.Close(); return ret; }
public static string[] SplitWords(string content) { List<string> strList = new List<string>(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; bool hasNext = tokenStream.IncrementToken(); while (hasNext) { ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); strList.Add(ita.Term); hasNext = tokenStream.IncrementToken(); } return strList.ToArray(); }
public static string[] SplitWords(string content) { List<string> strList = new List<string>(); //指定使用盘古 PanGuAnalyzer 分词算法 Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); ITermAttribute term = tokenStream.AddAttribute<ITermAttribute>(); ITermAttribute tempTerm = null; while (tokenStream.IncrementToken()) { //Next继续分词 直至返回null //得到分词后结果 if (tokenStream.HasAttribute<ITermAttribute>()) { tempTerm = tokenStream.GetAttribute<ITermAttribute>(); strList.Add(tempTerm.Term); } } return strList.ToArray(); }
public static string QueryParserWord(string content) { StringBuilder sb = new StringBuilder(); Analyzer analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita; bool hasNext = tokenStream.IncrementToken(); while (hasNext) { ita = tokenStream.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>(); sb.Append(ita.Term); sb.Append(" "); hasNext = tokenStream.IncrementToken(); } return sb.ToString(); }
private static IEnumerable<string> GetSearchTerms(string searchTerm) { List<string> result = new List<string>(); var analyzer = new PanGuAnalyzer(); StringReader sr = new StringReader(searchTerm); TokenStream stream = analyzer.TokenStream(null, sr); bool hasnext = stream.IncrementToken(); System.DateTime start = System.DateTime.Now; ITermAttribute ita; while (hasnext) { ita = stream.GetAttribute<ITermAttribute>(); result.Add(ita.Term); hasnext = stream.IncrementToken(); } stream.CloneAttributes(); sr.Close(); analyzer.Dispose(); var resultString = string.Join(" ", result); return resultString.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Concat(new[] { searchTerm }) .Distinct(StringComparer.OrdinalIgnoreCase) .Select(Escape); }
private static string[] SplitWords(string content) { List<string> strList = new List<string>(); PanGuAnalyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(content)); while(tokenStream.IncrementToken()) { var ita = tokenStream.GetAttribute<ITermAttribute>(); strList.Add(ita.Term); } return strList.ToArray(); }
/// <summary> /// 盘古分词 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> public static string[] PanGuSplit(string str) { Analyzer analyzer = new PanGuAnalyzer();//指定盘古分词算法。 TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; List<string> list = new List<string>(); while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return list.ToArray(); }