Пример #1
0
        public List <string> Analyze(string text)
        {
            List <string> list = new List <string>();

            char[] chars = text.ToCharArray();
            for (int i = 0; i < chars.Length; i++)   //i < chars.Length - _n+1
            {
                string x = string.Empty;
                int    k = (chars.Length - i) > _n?_n:(chars.Length - i);

                for (int j = 0; j < k; j++)
                {
                    if (CharacterUtil.IsCjkUnifiedIdeographs(chars[i + j]))
                    {
                        x += chars[i + j];
                    }
                    else
                    {
                        break;  //if the current char is puncuation, this ngram is stopped
                    }
                }
                if (!String.IsNullOrEmpty(x))
                {
                    list.Add(x);
                }
            }
            return(list);
        }
Пример #2
0
 public static bool IsChineseNumeral(char ch)
 {
     if (NumeralUtil.IsArabicNumeral(ch))
     {
         return(false);
     }
     if (CharacterUtil.IsLantingLetter(ch))
     {
         return(false);
     }
     for (int j = 0; j < chnGenText.Length; j++)
     {
         if (ch == chnGenText[j] || ch == chnRMBText[j] || ch == chnFullCharText[j])
         {
             return(true);
         }
     }
     return(false);
 }