예제 #1
0
        /*public void SetSentence (string str)
         * {
         *      context.ClearResult ();
         *      byte[] sen = System.Text.Encoding.UTF8.GetBytes (str);
         *      context.mSentence = sen;
         * }*/

        public void ParseSentence()
        {
            int len = context.mSentence.GetLength(0);

            while (context.mParseIndex < len)
            {
                ParseBase parse    = null;
                int       byte_val = (int)(context.mSentence [context.mParseIndex]);
                if (byte_val >= 0 && byte_val <= 0x7f)
                {
                    parse = new OneByteParse();
                }
                else if (byte_val >= 0xc0 && byte_val < 0xe0)
                {
                    parse = new TwoByteParse();
                }
                else if (byte_val >= 0xe0 && byte_val < 0xf0)
                {
                    parse = new ThreeByteParse();
                }
                else if (byte_val >= 0xf0)
                {
                    parse = new FourByteParse();
                }
                else
                {
                    break;
                }

                if (parse != null)
                {
                    parse.Interprete(context);
                }
            }
        }
예제 #2
0
        public int GetSubWordsLen(int maxLen)
        {
            context.mParseIndex = 0;

            int len = context.mSentence.GetLength(0);

            while (context.mParseIndex < len)
            {
                ParseBase parse    = null;
                int       byte_val = (int)(context.mSentence [context.mParseIndex]);
                if (byte_val >= 0 && byte_val <= 0x7f)
                {
                    parse = new OneByteParse();
                }
                else if (byte_val >= 0xc0 && byte_val < 0xe0)
                {
                    parse = new TwoByteParse();
                }
                else if (byte_val >= 0xe0 && byte_val < 0xf0)
                {
                    parse = new ThreeByteParse();
                }
                else if (byte_val >= 0xf0)
                {
                    parse = new FourByteParse();
                }
                else
                {
                    break;
                }

                if (context.mParseIndex + parse.GetByteNum() > maxLen)
                {
                    return(context.mParseIndex);
                }

                if (parse != null)
                {
                    parse.Interprete(context);
                }
            }

            return(context.mParseIndex);
        }
예제 #3
0
        /// <summary>
        /// 根据规则返回字符串的长度(汉字+1,英文和数字+0.5)
        /// </summary>
        /// <returns></returns>
        public float GetWordLenByRule()
        {
            float word_len = 0;
            int   len      = context.mSentence.GetLength(0);

            while (context.mParseIndex < len)
            {
                ParseBase parse    = null;
                int       byte_val = (int)(context.mSentence[context.mParseIndex]);
                if (byte_val >= 0 && byte_val <= 0x7f)
                {
                    parse     = new OneByteParse();
                    word_len += 0.7f;
                }
                else if (byte_val >= 0xc0 && byte_val < 0xe0)
                {
                    parse = new TwoByteParse();
                    word_len++;
                }
                else if (byte_val >= 0xe0 && byte_val < 0xf0)
                {
                    parse = new ThreeByteParse();
                    word_len++;
                }
                else if (byte_val >= 0xf0)
                {
                    parse = new FourByteParse();
                    word_len++;
                }
                else
                {
                    break;
                }

                if (parse != null)
                {
                    parse.Interprete(context);
                }
            }

            return(word_len);
        }