Пример #1
0
        public List <string> GetLabels(string sentence)
        {
            List <string> labels    = new List <string>();
            bool          bInitDict = HLSplitInit(@"C:\dll\");

            if (!bInitDict)
            {
                Console.WriteLine("初始化分词字典失败!", "错误");
                return(null);
            }

            IntPtr hHandle = HLOpenSplit(); //创建分词句柄

            if (hHandle == IntPtr.Zero)
            {
                //创建分词句柄失败
                Console.WriteLine("创建分词句柄失败!", "错误");
                HLFreeSplit();//卸载分词字典
                return(null);
            }

            short iExtraCalcFlag = 0; //附加计算标志,不进行附加计算

            //获得附加计算标识
            //if (this.chkPos.Checked)
            iExtraCalcFlag |= HL_CAL_OPT_POS;//
            //if (this.chkKeyword.Checked)
            iExtraCalcFlag |= HL_CAL_OPT_KEYWORD;
            //if (this.chkSeach.Checked)
            //iExtraCalcFlag |= HL_CAL_OPT_SEARCH;
            //if (this.chkFinger.Checked)
            iExtraCalcFlag |= HL_CAL_OPT_FINGER;
            DateTime bgdt     = DateTime.Now;
            bool     bSuccess = HLSplitWord(hHandle, sentence, iExtraCalcFlag);

            System.TimeSpan ts = DateTime.Now - bgdt;
            Console.WriteLine("get labels---->" + ts);
            //this.txtMsg.Text = string.Format("用时{0}分{1}秒{2}毫秒", ts.Minutes, ts.Seconds, ts.Milliseconds);
            if (bSuccess)
            {
                //分词成功
                int nResultCnt = HLGetWordCnt(hHandle);//取得分词个数
                for (int i = 0; i < nResultCnt; i++)
                {
                    //取得分词结果
                    IntPtr h = HLGetWordAt(hHandle, i);
                    //取得一个分词结果
                    SHLSegWord pWord = (SHLSegWord)Marshal.PtrToStructure(h, typeof(SHLSegWord));
                    if ((GetNatureString(pWord.s_dwPOS).Equals(".n") ||  //名词
                         GetNatureString(pWord.s_dwPOS).Equals(".v") ||  //动词
                         GetNatureString(pWord.s_dwPOS).Equals(".ns") || //地名
                         GetNatureString(pWord.s_dwPOS).Equals(".m") ||  //数词
                         GetNatureString(pWord.s_dwPOS).Equals(".mq") || //数量词
                         GetNatureString(pWord.s_dwPOS).Equals(".nr")) &&
                        pWord.s_szWord.Length > 1)
                    {
                        labels.Add(pWord.s_szWord);
                    }
                }
                HLCloseSplit(hHandle);//关闭分词句柄
            }
            else
            {
                //分词失败
                Console.WriteLine("分词失败!", "错误");
                HLCloseSplit(hHandle); //关闭分词句柄
                HLFreeSplit();         //卸载分词字典
                labels = null;
            }
            HLFreeSplit(); //卸载分词词典
            return(labels);
        }
Пример #2
0
        /// <summary>
        /// 判断是否含有标点
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public bool IsHasPunctuation(string str)
        {
            if (str == null)
            {
                return(false);
            }
            bool hasPunctuation = false;
            bool bInitDict      = HLSplitInit(@"C:\dll\");

            if (!bInitDict)
            {
                Console.WriteLine("初始化分词字典失败!", "错误");
            }

            IntPtr hHandle = HLOpenSplit(); //创建分词句柄

            if (hHandle == IntPtr.Zero)
            {
                //创建分词句柄失败
                Console.WriteLine("创建分词句柄失败!", "错误");
                HLFreeSplit();//卸载分词字典
            }

            short iExtraCalcFlag = 0; //附加计算标志,不进行附加计算

            //获得附加计算标识
            //if (this.chkPos.Checked)
            iExtraCalcFlag |= HL_CAL_OPT_POS;//
            //if (this.chkKeyword.Checked)
            iExtraCalcFlag |= HL_CAL_OPT_KEYWORD;
            //if (this.chkSeach.Checked)
            //iExtraCalcFlag |= HL_CAL_OPT_SEARCH;
            //if (this.chkFinger.Checked)
            iExtraCalcFlag |= HL_CAL_OPT_FINGER;
            DateTime bgdt     = DateTime.Now;
            bool     bSuccess = HLSplitWord(hHandle, str, iExtraCalcFlag);

            System.TimeSpan ts = DateTime.Now - bgdt;
            Console.WriteLine("get names----->" + ts);
            //this.txtMsg.Text = string.Format("用时{0}分{1}秒{2}毫秒", ts.Minutes, ts.Seconds, ts.Milliseconds);
            if (bSuccess)
            {
                //分词成功
                int nResultCnt = HLGetWordCnt(hHandle);//取得分词个数
                for (short i = 0; i < nResultCnt; i++)
                {
                    //取得分词结果
                    IntPtr h = HLGetWordAt(hHandle, i);
                    //取得一个分词结果
                    SHLSegWord pWord = (SHLSegWord)Marshal.PtrToStructure(h, typeof(SHLSegWord));
                    if (GetNatureString(pWord.s_dwPOS).Equals(".w"))
                    {
                        hasPunctuation = true;
                    }
                }
                HLCloseSplit(hHandle);//关闭分词句柄
            }
            else
            {
                //分词失败
                Console.WriteLine("分词失败!", "错误");
                HLCloseSplit(hHandle); //关闭分词句柄
                HLFreeSplit();         //卸载分词字典
            }
            HLFreeSplit();             //卸载分词词典
            return(hasPunctuation);
        }