public static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); ICollection <WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return(result.ToString().Trim()); }
/// <summary> /// 盘古分词 /// </summary> /// <param name="keywords">分词关键字</param> /// <returns>分词后的字符</returns> /// <remarks>2013-3-8 杨浩 创建</remarks> public static string Participle(string keywords) { StringBuilder result = new StringBuilder(); PanGuTokenizer ktTokenizer = new PanGuTokenizer(); ICollection <PanGu.WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); if (words.Count != 0) { _keyword = words.Max().Word;//记录用户查询关键字 } foreach (var word in words.Where(word => word != null)) { result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank)); } return(result.ToString().Trim()); }
private static string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer) { StringBuilder result = new StringBuilder(); /*执行分词操作 一个关键字可以拆分为多个次和单个字*/ ICollection <WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } result.AppendFormat("{0}/", word.Word); } return(result.ToString().Trim()); }
/// <summary> /// 字符串分词 /// </summary> /// <param name="keywords">字符串</param> /// <param name="ktTokenizer">分词</param> /// <returns></returns> public string GetKeyWordsSplitFilter(string keywords, PanGuTokenizer ktTokenizer) { // 拆分字符串 List <string> list = new List <string>(); ICollection <WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords); foreach (WordInfo word in words) { if (word == null) { continue; } else if (word.Word.Length > 1 && !list.Contains(word.Word)) { list.Add(word.Word); } } return(string.Join(" ", list.ToArray())); }
public void ContainAnalyzerTest() { LuceneEngine engine = new LuceneEngine(); engine.Init(); //string text = "三国演义"; //string text = "a|f|g|i|m"; string text = "NBA常规赛-快船vs凯尔特人"; TextReader tr = new StringReader(text); PanGuTokenizer ct = new PanGuTokenizer(tr); int end = 0; Lucene.Net.Analysis.Token t; string ss = string.Empty; while (end < text.Length) { t = ct.Next(); end = t.EndOffset(); ss = ss + t.TermText() + "/ "; } string xxx = ss; }
public void SpellTest() { LuceneEngine engine = new LuceneEngine(); engine.Init(); var firsts = SpellUtils.GetCnSegment("NBA常规赛-快船vs凯尔特人"); var spells = SpellUtils.GetSpellSegment("战重警和"); var segment = new Segment(); var collection = segment.DoSegment("国际足球100509K联赛釜山-大田"); var list = new List <string>(); foreach (WordInfo word in collection) { if (word == null) { continue; } list.Add(word.Word); } //var list = SpellUtils.GetSpellSegment("战警"); string sss = "san国yan义"; string ssss = Synacast.LuceneNetSearcher.Searcher.Searcher.SegmentKeyWord(sss); string text = "重庆"; TextReader tr = new StringReader(text); PanGuTokenizer ct = new PanGuTokenizer(tr); int end = 0; Lucene.Net.Analysis.Token t; string ss = string.Empty; while (end < text.Length) { t = ct.Next(); end = t.EndOffset(); ss = ss + t.TermText() + "/ "; } }