/// <summary> /// 获取分词之后的字符串集合 /// </summary> /// <param name="objStr"></param> /// <param name="type"></param> /// <returns></returns> public static IEnumerable <string> GetSplitWords(string objStr, JiebaTypeEnum type = JiebaTypeEnum.Default) { var jieba = new JiebaSegmenter(); switch (type) { case JiebaTypeEnum.Default: return(jieba.Cut(objStr)); //精确模式-带HMM case JiebaTypeEnum.CutAll: return(jieba.Cut(objStr, cutAll: true)); //全模式 case JiebaTypeEnum.CutForSearch: return(jieba.CutForSearch(objStr)); //搜索引擎模式 default: return(jieba.Cut(objStr, false, false)); //精确模式-不带HMM } }
/// <summary> /// 获取分词之后去重的集合 /// </summary> /// <param name="objStr"></param> /// <param name="type"></param> /// <returns></returns> public static IEnumerable <string> GetSplitWordList(this string objStr, JiebaTypeEnum type = JiebaTypeEnum.Default) { var words = GetSplitWords(objStr, type); //没结果则返回空字符串 if (words == null || words.Count() < 1) { return(null); } for (int i = 0; i < words.Count(); i++) { if (words.ToArray()[i].Length < 2) { words = DeleteArr(i, words.ToArray()); } } words = words.Distinct();//去重 return(words); }
/// <summary> /// 获取分词之后的字符串 /// </summary> /// <param name="objStr"></param> /// <param name="type"></param> /// <returns></returns> public static string GetSplitWordStr(this string objStr, JiebaTypeEnum type = JiebaTypeEnum.Default) { var words = GetSplitWords(objStr, type); //没结果则返回空字符串 if (words == null || words.Count() < 1) { return(string.Empty); } for (int i = 0; i < words.Count(); i++) { if (words.ToArray()[i].Length < 2) { words = DeleteArr(i, words.ToArray()); } } words = words.Distinct();//去重 return(string.Join(",", words)); }
/// <summary> /// 获取分词之后的字符串 /// </summary> /// <param name="objStr"></param> /// <param name="type"></param> /// <returns></returns> public static string GetSplitWordStr(this string objStr, JiebaTypeEnum type = JiebaTypeEnum.Default) { var words = GetSplitWords(objStr, type); return(JoinKeyWords(words)); }