public static string SplitWords(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); //var strList = new List<string>(); //var analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 //var tonkenStream = analyzer.TokenStream("", new StringReader(content)); //Token token; //while ((token=tonkenStream.Next())!=null) //{ // //Next继续分词 直至返回null // strList.Add(token.TermText());//得到分词结果 //} //return strList.ToArray(); }
/// <summary> /// 关键词分词 /// </summary> /// <param name="keywords">关键字</param> /// <param name="ktTokenizer">采用方式</param> /// <returns></returns> public static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { var result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); }
public static string SplitWords(string keywords, PanGuTokenizer ktTokenizer,bool isLike=false) { var result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } if (isLike) result.AppendFormat("{0}", word.Word); else result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); }