public override TokenStream TokenStream(string fieldName, TextReader reader) { TokenStream result = new PanGuTokenizer(reader, _OriginalResult, _options, _parameters); result = new LowerCaseFilter(result); return(result); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { var result = new PanGuTokenizer(reader, _originalResult, _options, _parameters); var finalStream = new LowerCaseFilter(LVERSION.LUCENE_48, result); return(new TokenStreamComponents(result, finalStream)); }
public static string SplitWords(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); //var strList = new List<string>(); //var analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法 //var tonkenStream = analyzer.TokenStream("", new StringReader(content)); //Token token; //while ((token=tonkenStream.Next())!=null) //{ // //Next继续分词 直至返回null // strList.Add(token.TermText());//得到分词结果 //} //return strList.ToArray(); }
/// <summary> /// 关键词分词 /// </summary> /// <param name="keywords">关键字</param> /// <param name="ktTokenizer">采用方式</param> /// <returns></returns> public static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { var result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); }
public static string SplitWords(string keywords, PanGuTokenizer ktTokenizer,bool isLike=false) { var result = new StringBuilder(); ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } if (isLike) result.AppendFormat("{0}", word.Word); else result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return result.ToString().Trim(); }
public override TokenStream TokenStream(string fieldName, TextReader reader) { TokenStream result = new PanGuTokenizer(reader, _OriginalResult, _options, _parameters); result = new LowerCaseFilter(result); return result; }