public override TokenStream TokenStream(string fieldName, TextReader reader) { var seg = new JiebaSegmenter(); TokenStream result = new JiebaTokenizer(seg, reader); // This filter is necessary, because the parser converts the queries to lower case. result = new LowerCaseFilter(result); result = new StopFilter(true, result, StopWords); return result; }
protected string GetKeyWordsSplitBySpace(string keywords) { StringBuilder result = new StringBuilder(); var tokenizer = new JiebaTokenizer(new JiebaSegmenter(), keywords); var words = tokenizer.Tokenize(keywords); foreach (var word in words) { if (string.IsNullOrWhiteSpace(word.Word)) { continue; } result.AppendFormat("{0} ", word.Word); } string kwords = result.ToString().Trim(); //var terms = kwords.Trim().Replace("-", " ").Split(' ') // .Where(x => !string.IsNullOrEmpty(x)).Select(x => x.Trim()); //匹配 var terms = kwords.Trim().Replace("-", " ").Split(' ') .Where(x => !string.IsNullOrEmpty(x)).Select(x => x.Trim() + "*"); return string.Join(" ", terms); }