public List <string> GetLabels(string sentence) { List <string> labels = new List <string>(); bool bInitDict = HLSplitInit(@"C:\dll\"); if (!bInitDict) { Console.WriteLine("初始化分词字典失败!", "错误"); return(null); } IntPtr hHandle = HLOpenSplit(); //创建分词句柄 if (hHandle == IntPtr.Zero) { //创建分词句柄失败 Console.WriteLine("创建分词句柄失败!", "错误"); HLFreeSplit();//卸载分词字典 return(null); } short iExtraCalcFlag = 0; //附加计算标志,不进行附加计算 //获得附加计算标识 //if (this.chkPos.Checked) iExtraCalcFlag |= HL_CAL_OPT_POS;// //if (this.chkKeyword.Checked) iExtraCalcFlag |= HL_CAL_OPT_KEYWORD; //if (this.chkSeach.Checked) //iExtraCalcFlag |= HL_CAL_OPT_SEARCH; //if (this.chkFinger.Checked) iExtraCalcFlag |= HL_CAL_OPT_FINGER; DateTime bgdt = DateTime.Now; bool bSuccess = HLSplitWord(hHandle, sentence, iExtraCalcFlag); System.TimeSpan ts = DateTime.Now - bgdt; Console.WriteLine("get labels---->" + ts); //this.txtMsg.Text = string.Format("用时{0}分{1}秒{2}毫秒", ts.Minutes, ts.Seconds, ts.Milliseconds); if (bSuccess) { //分词成功 int nResultCnt = HLGetWordCnt(hHandle);//取得分词个数 for (int i = 0; i < nResultCnt; i++) { //取得分词结果 IntPtr h = HLGetWordAt(hHandle, i); //取得一个分词结果 SHLSegWord pWord = (SHLSegWord)Marshal.PtrToStructure(h, typeof(SHLSegWord)); if ((GetNatureString(pWord.s_dwPOS).Equals(".n") || //名词 GetNatureString(pWord.s_dwPOS).Equals(".v") || //动词 GetNatureString(pWord.s_dwPOS).Equals(".ns") || //地名 GetNatureString(pWord.s_dwPOS).Equals(".m") || //数词 GetNatureString(pWord.s_dwPOS).Equals(".mq") || //数量词 GetNatureString(pWord.s_dwPOS).Equals(".nr")) && pWord.s_szWord.Length > 1) { labels.Add(pWord.s_szWord); } } HLCloseSplit(hHandle);//关闭分词句柄 } else { //分词失败 Console.WriteLine("分词失败!", "错误"); HLCloseSplit(hHandle); //关闭分词句柄 HLFreeSplit(); //卸载分词字典 labels = null; } HLFreeSplit(); //卸载分词词典 return(labels); }
/// <summary> /// 判断是否含有标点 /// </summary> /// <param name="str"></param> /// <returns></returns> public bool IsHasPunctuation(string str) { if (str == null) { return(false); } bool hasPunctuation = false; bool bInitDict = HLSplitInit(@"C:\dll\"); if (!bInitDict) { Console.WriteLine("初始化分词字典失败!", "错误"); } IntPtr hHandle = HLOpenSplit(); //创建分词句柄 if (hHandle == IntPtr.Zero) { //创建分词句柄失败 Console.WriteLine("创建分词句柄失败!", "错误"); HLFreeSplit();//卸载分词字典 } short iExtraCalcFlag = 0; //附加计算标志,不进行附加计算 //获得附加计算标识 //if (this.chkPos.Checked) iExtraCalcFlag |= HL_CAL_OPT_POS;// //if (this.chkKeyword.Checked) iExtraCalcFlag |= HL_CAL_OPT_KEYWORD; //if (this.chkSeach.Checked) //iExtraCalcFlag |= HL_CAL_OPT_SEARCH; //if (this.chkFinger.Checked) iExtraCalcFlag |= HL_CAL_OPT_FINGER; DateTime bgdt = DateTime.Now; bool bSuccess = HLSplitWord(hHandle, str, iExtraCalcFlag); System.TimeSpan ts = DateTime.Now - bgdt; Console.WriteLine("get names----->" + ts); //this.txtMsg.Text = string.Format("用时{0}分{1}秒{2}毫秒", ts.Minutes, ts.Seconds, ts.Milliseconds); if (bSuccess) { //分词成功 int nResultCnt = HLGetWordCnt(hHandle);//取得分词个数 for (short i = 0; i < nResultCnt; i++) { //取得分词结果 IntPtr h = HLGetWordAt(hHandle, i); //取得一个分词结果 SHLSegWord pWord = (SHLSegWord)Marshal.PtrToStructure(h, typeof(SHLSegWord)); if (GetNatureString(pWord.s_dwPOS).Equals(".w")) { hasPunctuation = true; } } HLCloseSplit(hHandle);//关闭分词句柄 } else { //分词失败 Console.WriteLine("分词失败!", "错误"); HLCloseSplit(hHandle); //关闭分词句柄 HLFreeSplit(); //卸载分词字典 } HLFreeSplit(); //卸载分词词典 return(hasPunctuation); }