/// <summary> /// ��ȡ���ֵĺ���ƴ�� /// </summary> /// <param name="strChinese">��ת�����ַ���</param> /// <param name="options">SpellOptionsö��ֵ�İ�λ OR ���</param> /// <example> /// <code> /// string strChinese = "һֻ��ɫ����������ֻ��"; /// string result = Spell.MakeSpellCode(strChinese, SpellOptions.EnableUnicodeLetter); /// </code> /// </example> /// <returns></returns> public static string MakeSpellCode(string strChinese, SpellOptions options) { Encoding encoding = Encoding.GetEncoding("GB2312"); byte[] local = encoding.GetBytes(strChinese); // local = encoding.GetString(local_v1); int i = 0; var result = new StringBuilder(); while (i < local.Length) { // �Ƿ�ΪGBK �ַ� if ((local[i] >= 129) && (local[i + 1] >= 64)) { string tmp; switch ((int)local[i]) { case 163: // ȫ�� ASCII { tmp = ((char)(local[i + 1] - 128)).ToString(CultureInfo.InvariantCulture); if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) && !( ((tmp[0] >= 97) && (tmp[0] <= 122)) || ((tmp[0] >= 65) && (tmp[0] <= 90)) || ((tmp[0] >= 48) && (tmp[0] <= 57)) ) ) { // ���Ʋ������������, ��ĸ���ַ� tmp = ""; } break; } case 162: // �������� { if (local[i + 1] > 160) { tmp = _charIndex[local[i + 1] - 160]; } else if ((options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation) { // ������������, ���ܷ�����ַ����������� tmp = "?"; } else { tmp = ""; } break; } case 166: // ϣ����ĸ { throw new Exception("TO DO"); } default: { int index = SpellCodeIndex[local[i] - 129, local[i + 1] - 64] - 1; if (index == 0) // �˺���, ���ܷ�����ַ�, GBK ���� { //�Ƿ���Ϊ�ʺ� tmp = (options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation ? "?" : ""; } else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly) //�ǵ�ƴ�� { tmp = SpellMusicCode[index].ToUpper().Substring(0, 1); } else { tmp = SpellMusicCode[index].ToUpper(); } break; } } //end of swicth() result.Append(tmp); i += 2; } else // �� GBK �ַ�����, ������ַ� { if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) || ( ((local[i] >= 97) && (local[i] <= 122)) || ((local[i] >= 65) && (local[i] <= 90)) || ((local[i] >= 48) && (local[i] <= 57)) ) ) { result.Append(((char)local[i]).ToString(CultureInfo.InvariantCulture).ToUpper()); } i++; } } return result.ToString(); }
/// <summary> /// »ñÈ¡ºº×ֵĺºÓïÆ´Òô /// </summary> /// <param name="strChinese">Óûת»»µÄ×Ö·û´®</param> /// <param name="options">SpellOptionsö¾ÙÖµµÄ°´Î» OR ×éºÏ</param> /// <example> /// <code> /// string strChinese = "Ò»Ö»×ØÉ«ºüÀêÌø¹ýÄÇÖ»¹·"; /// string result = Spell.MakeSpellCode(strChinese, SpellOptions.EnableUnicodeLetter); /// </code> /// </example> /// <returns></returns> public static string MakeSpellCode(string strChinese, SpellOptions options) { Encoding encoding = Encoding.GetEncoding("GB2312"); byte[] local = encoding.GetBytes(strChinese); // local = encoding.GetString(local_v1); int i = 0; var result = new StringBuilder(); while (i < local.Length) { // ÊÇ·ñΪGBK ×Ö·û if ((local[i] >= 129) && (local[i + 1] >= 64)) { string tmp; switch ((int)local[i]) { case 163: // È«½Ç ASCII { tmp = ((char)(local[i + 1] - 128)).ToString(CultureInfo.InvariantCulture); if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) && !( ((tmp[0] >= 97) && (tmp[0] <= 122)) || ((tmp[0] >= 65) && (tmp[0] <= 90)) || ((tmp[0] >= 48) && (tmp[0] <= 57)) ) ) { // ¿ØÖƲ»ÄÜÊä³ö·ÇÊý×Ö, ×ÖĸµÄ×Ö·û tmp = ""; } break; } case 162: // ÂÞÂíÊý×Ö { if (local[i + 1] > 160) { tmp = _charIndex[local[i + 1] - 160]; } else if ((options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation) { // ÔÚÂÞÂíÊý×ÖÇø, ²»ÄÜ·ÒëµÄ×Ö·û·ÇÂÞÂíÊý×Ö tmp = "?"; } else { tmp = ""; } break; } case 166: // Ï£À°×Öĸ { throw new Exception("TO DO"); } default: { int index = SpellCodeIndex[local[i] - 129, local[i + 1] - 64] - 1; if (index == 0) // Î޴˺º×Ö, ²»ÄÜ·ÒëµÄ×Ö·û, GBK ±£Áô { //ÊÇ·ñ·ÒëΪÎʺŠtmp = (options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation ? "?" : ""; } else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly) //Êǵ¥Æ´Òô { tmp = SpellMusicCode[index].ToUpper().Substring(0, 1); } else { tmp = SpellMusicCode[index].ToUpper(); } break; } } //end of swicth() result.Append(tmp); i += 2; } else // ÔÚ GBK ×Ö·û¼¯Íâ, ¼´°ë½Ç×Ö·û { if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) || ( ((local[i] >= 97) && (local[i] <= 122)) || ((local[i] >= 65) && (local[i] <= 90)) || ((local[i] >= 48) && (local[i] <= 57)) ) ) { result.Append(((char)local[i]).ToString(CultureInfo.InvariantCulture).ToUpper()); } i++; } } return result.ToString(); }
public SpellAnalyzer(SpellOptions options) { _options = options; }
/// <summary> /// 将中文转化为拼音 /// </summary> /// <param name="strChinese">中文</param> /// <param name="options">选项</param> /// <returns></returns> public static string MakeSpellCode(string strChinese, string splitChar, SpellOptions options) { byte[] bytes = Encoding.GetEncoding("GB2312").GetBytes(strChinese); int index = 0; StringBuilder sb = new StringBuilder(); string append = ""; int length = bytes.Length; while (index < length) { char ch2; if ((bytes[index] < 0x81) || (bytes[index + 1] < 0x40)) { goto Label_0187; } switch (bytes[index]) { case 0xa2: if (bytes[index + 1] <= 160) { break; } append = _charIndex[bytes[index + 1] - 160]; goto Label_016A; case 0xa3: append = ((char)(bytes[index + 1] - 0x80)).ToString(); if (((((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) && ((append[0] < 'a') || (append[0] > 'z'))) && ((append[0] < 'A') || (append[0] > 'Z'))) && ((append[0] < '0') || (append[0] > '9'))) { append = ""; } goto Label_016A; case 0xa6: append = ""; goto Label_016A; default: { int num2 = _spellCodeIndex[bytes[index] - 0x81, bytes[index + 1] - 0x40] - 1; if (num2 == -1) { append = ""; } else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly) { append = _spellMusicCode[num2].Substring(0, 1); } else { append = _spellMusicCode[num2]; } goto Label_016A; } } append = ""; Label_016A: if (!string.IsNullOrEmpty(append)) { AppendString(sb, append, splitChar); //AppendString(sb, append.Substring(0, 1).ToUpper(), splitChar); } index += 2; continue; Label_0187: ch2 = (char)bytes[index]; AppendString(sb, ch2.ToString(), splitChar); index++; } return sb.ToString(); }
/// <summary> /// 获取汉字的汉语拼音 /// </summary> /// <param name="chineseStr">欲转换的字符串</param> /// <param name="options">SpellOptions枚举值的按位 OR 组合</param> /// <returns></returns> public static string GetSpellWord(string chineseStr, SpellOptions options) { Encoding encoding = Encoding.GetEncoding("GB2312"); byte[] local = encoding.GetBytes(chineseStr); int i = 0; int index; StringBuilder result = new StringBuilder(); string tmp = ""; while (i < local.Length) { // 是否为GBK 字符 if ((local[i] >= 129) && (local[i + 1] >= 64)) { switch ((int) local[i]) { case 163: // 全角 ASCII { tmp = ((char) (local[i + 1] - 128)).ToString(); if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) && !( ((tmp[0] >= 97) && (tmp[0] <= 122)) || ((tmp[0] >= 65) && (tmp[0] <= 90)) || ((tmp[0] >= 48) && (tmp[0] <= 57)) ) ) { // 控制不能输出非数字, 字母的字符 tmp = ""; } break; } case 162: // 罗马数字 { if (local[i + 1] > 160) { tmp = _charIndex[(int) local[i + 1] - 160]; } else if ((options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation) { // 在罗马数字区, 不能翻译的字符非罗马数字 tmp = "?"; } else { tmp = ""; } break; } case 166: // 希腊字母 { // // TODO:未实现 // break; } default: { index = _spellCodeIndex[(int) local[i] - 129, local[i + 1] - 64] - 1; if (index == 0) // 无此汉字, 不能翻译的字符, GBK 保留 { //是否翻译为问号 if ((options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation) { tmp = "?"; } else { tmp = ""; } } else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly) //是单拼音 { tmp = _spellMusicCode[index].ToUpper().Substring(0, 1); } else { tmp = _spellMusicCode[index].ToUpper(); } break; } } //end of swicth() result.Append(tmp); i += 2; } else // 在 GBK 字符集外, 即半角字符 { if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) || ( ((local[i] >= 97) && (local[i] <= 122)) || ((local[i] >= 65) && (local[i] <= 90)) || ((local[i] >= 48) && (local[i] <= 57)) ) ) { result.Append(((char) local[i]).ToString().ToUpper()); } //note:下行修改过的 else if ((options & SpellOptions.TranslateSpecialWordToConnect) == SpellOptions.TranslateSpecialWordToConnect) { result.Append("-"); } i ++; } } return result.ToString(); }