示例#1
0
        /// <summary>
        /// 对文本进行分词
        /// </summary>
        /// <param name="bytes">输入的字符串byte数组</param>
        /// <param name="enc">编码方式</param>
        /// <returns>分词结果列表</returns>
        public List <ResultTerm> Segment(byte[] bytes, Encoding enc)
        {
            result_t[] result       = new result_t[bytes.Length];
            int        posStrArrLen = 0;

            byte[] bys     = new byte[bytes.Length];
            int    i       = 0;
            int    nWrdCnt = ICTCLAS_ParagraphProcessAW_B(bytes, result, getCodeType(enc), true);

            List <ResultTerm> returnResult = new List <ResultTerm>(nWrdCnt);

            result_t r;

            //取字符串真实长度:
            byte[] gbbytes = bytes;  // enc.GetBytes(str);

            for (i = 0; i < result.Length; i++)
            {
                r = result[i];

                if (r.length != 0)
                {
                    posStrArrLen = 0;
                    for (int l = 0; l < 8; l++)
                    {
                        if (r.sPos[l] == 0)
                        {
                            posStrArrLen = l;
                            break;
                        }
                    }

                    ResultTerm word = new ResultTerm();
                    word.Word   = enc.GetString(gbbytes, r.start, r.length);
                    word.POS    = r.POS_id;
                    word.POSStr = Encoding.ASCII.GetString(r.sPos, 0, posStrArrLen);
                    returnResult.Add(word);
                }
            }
            result = null;

            return(returnResult);
        }
示例#2
0
        /// <summary>
        /// 对文本进行分词
        /// </summary>
        /// <param name="str">要分词的字符串</param>
        /// <returns>分词结果列表</returns>
        public List <ResultTerm> Segment(string str)
        {
            result_t[] result       = new result_t[str.Length];
            int        posStrArrLen = 0;
            result_t   r;

            byte[]            bytes        = Encoding.UTF8.GetBytes(str);
            int               nWrdCnt      = ICTCLAS_ParagraphProcessAW_B(bytes, result, eCodeType.CODE_TYPE_UTF8, true);
            List <ResultTerm> returnResult = new List <ResultTerm>(nWrdCnt);

            for (int i = 0; i < result.Length; i++)
            {
                r = result[i];

                if (r.length != 0)
                {
                    posStrArrLen = 0;
                    for (int l = 0; l < 8; l++)
                    {
                        if (r.sPos[l] == 0)
                        {
                            posStrArrLen = l;
                            break;
                        }
                    }

                    ResultTerm word = new ResultTerm();
                    word.Word   = Encoding.UTF8.GetString(bytes, r.start, r.length);
                    word.POS    = r.POS_id;
                    word.POSStr = Encoding.ASCII.GetString(r.sPos, 0, posStrArrLen);
                    returnResult.Add(word);
                }
            }
            result = null;
            return(returnResult);
        }
示例#3
0
        /// <summary>
        /// ���ı����зִ�
        /// </summary>
        /// <param name="bytes">������ַ���byte����</param>
        /// <param name="enc">���뷽ʽ</param>
        /// <returns>�ִʽ���б�</returns>
        public List<ResultTerm> Segment(byte[] bytes, Encoding enc)
        {
            result_t[] result = new result_t[bytes.Length];
            int posStrArrLen = 0;
            byte[] bys = new byte[bytes.Length];
            int i = 0;
            int nWrdCnt = ICTCLAS_ParagraphProcessAW_B(bytes, result, getCodeType(enc), true);

            List<ResultTerm> returnResult = new List<ResultTerm>(nWrdCnt);

            result_t r;
            //ȡ�ַ�����ʵ����:
            byte[] gbbytes = bytes;  // enc.GetBytes(str);

            for (i = 0; i < result.Length; i++)
            {
                r = result[i];

                if (r.length != 0)
                {
                    posStrArrLen = 0;
                    for (int l = 0; l < 8; l++)
                    {
                        if (r.sPos[l] == 0)
                        {
                            posStrArrLen = l;
                            break;
                        }
                    }

                    ResultTerm word = new ResultTerm();
                    word.Word = enc.GetString(gbbytes, r.start, r.length);
                    word.POS = r.POS_id;
                    word.POSStr = Encoding.ASCII.GetString(r.sPos, 0, posStrArrLen);
                    returnResult.Add(word);
                }
            }
            result = null;

            return returnResult;
        }
示例#4
0
        /// <summary>
        /// ���ı����зִ�
        /// </summary>
        /// <param name="str">Ҫ�ִʵ��ַ���</param>
        /// <returns>�ִʽ���б�</returns>
        public List<ResultTerm> Segment(string str)
        {
            result_t[] result = new result_t[str.Length];
            int posStrArrLen = 0;
            result_t r;
            byte[] bytes = Encoding.UTF8.GetBytes(str);
            int nWrdCnt = ICTCLAS_ParagraphProcessAW_B(bytes, result, eCodeType.CODE_TYPE_UTF8, true);
            List<ResultTerm> returnResult = new List<ResultTerm>(nWrdCnt);

            for (int i = 0; i < result.Length; i++)
            {
                r = result[i];

                if (r.length != 0)
                {
                    posStrArrLen =0;
                    for(int l = 0; l<8 ;l++)
                    {
                        if (r.sPos[l]==0 )
                        {
                            posStrArrLen = l;
                            break;
                        }
                    }

                    ResultTerm word = new ResultTerm();
                    word.Word = Encoding.UTF8.GetString(bytes, r.start, r.length);
                    word.POS = r.POS_id;
                    word.POSStr = Encoding.ASCII.GetString(r.sPos, 0, posStrArrLen);
                    returnResult.Add(word);
                }
            }
            result = null;
            return returnResult;
        }