public List <string> Analyze(string text) { List <string> list = new List <string>(); char[] chars = text.ToCharArray(); for (int i = 0; i < chars.Length; i++) //i < chars.Length - _n+1 { string x = string.Empty; int k = (chars.Length - i) > _n?_n:(chars.Length - i); for (int j = 0; j < k; j++) { if (CharacterUtil.IsCjkUnifiedIdeographs(chars[i + j])) { x += chars[i + j]; } else { break; //if the current char is puncuation, this ngram is stopped } } if (!String.IsNullOrEmpty(x)) { list.Add(x); } } return(list); }
public static bool IsChineseNumeral(char ch) { if (NumeralUtil.IsArabicNumeral(ch)) { return(false); } if (CharacterUtil.IsLantingLetter(ch)) { return(false); } for (int j = 0; j < chnGenText.Length; j++) { if (ch == chnGenText[j] || ch == chnRMBText[j] || ch == chnFullCharText[j]) { return(true); } } return(false); }