Наследование: Lucene.Net.Analysis.Tokenizer
        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new PanGuTokenizer(reader, _OriginalResult, _options, _parameters);

            result = new LowerCaseFilter(result);
            return(result);
        }
Пример #2
0
        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
            var result      = new PanGuTokenizer(reader, _originalResult, _options, _parameters);
            var finalStream = new LowerCaseFilter(LVERSION.LUCENE_48, result);

            return(new TokenStreamComponents(result, finalStream));
        }
Пример #3
0
        public static string SplitWords(string keywords, PanGuTokenizer ktTokenizer)
        {
            StringBuilder result = new StringBuilder();
            ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
            foreach (WordInfo word in words)
            {
                if (word == null)
                {
                    continue;
                }
                result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
            }
            return result.ToString().Trim();

            //var strList = new List<string>();
            //var analyzer = new PanGuAnalyzer();//指定使用盘古 PanGuAnalyzer 分词算法
            //var tonkenStream = analyzer.TokenStream("", new StringReader(content));
            //Token token;
            //while ((token=tonkenStream.Next())!=null)
            //{
            //    //Next继续分词 直至返回null
            //    strList.Add(token.TermText());//得到分词结果
            //}
            //return strList.ToArray();
        }
Пример #4
0
 /// <summary>
 /// 关键词分词
 /// </summary>
 /// <param name="keywords">关键字</param>
 /// <param name="ktTokenizer">采用方式</param>
 /// <returns></returns>
 public static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
 {
     var result = new StringBuilder();
     ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
     foreach (WordInfo word in words)
     {
         if (word == null)
         {
             continue;
         }
         result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
     }
     return result.ToString().Trim();
 }
Пример #5
0
 public static string SplitWords(string keywords, PanGuTokenizer ktTokenizer,bool isLike=false)
 {
     var result = new StringBuilder();
     ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
     foreach (WordInfo word in words)
     {
         if (word == null)
         {
             continue;
         }
         if (isLike)
             result.AppendFormat("{0}", word.Word);
         else
             result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
     }
     return result.ToString().Trim();
 }
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new PanGuTokenizer(reader, _OriginalResult, _options, _parameters);
     result = new LowerCaseFilter(result);
     return result;
 }