Ejemplo n.º 1
0
        /// <summary>
        /// ��ȡ���ֵĺ���ƴ��
        /// </summary>
        /// <param name="strChinese">��ת�����ַ���</param>
        /// <param name="options">SpellOptionsö��ֵ�İ�λ OR ���</param>
        /// <example>
        /// <code>
        /// string strChinese = "һֻ��ɫ����������ֻ��";
        /// string result = Spell.MakeSpellCode(strChinese, SpellOptions.EnableUnicodeLetter);
        /// </code>
        /// </example>
        /// <returns></returns>
        public static string MakeSpellCode(string strChinese, SpellOptions options)
        {
            Encoding encoding = Encoding.GetEncoding("GB2312");

            byte[] local = encoding.GetBytes(strChinese);

            //			local = encoding.GetString(local_v1);

            int i = 0;
            var result = new StringBuilder();

            while (i < local.Length)
            {
                // �Ƿ�ΪGBK �ַ�
                if ((local[i] >= 129)
                    && (local[i + 1] >= 64))
                {
                    string tmp;
                    switch ((int)local[i])
                    {
                        case 163:	// ȫ�� ASCII
                            {
                                tmp = ((char)(local[i + 1] - 128)).ToString(CultureInfo.InvariantCulture);

                                if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter)
                                    && !(
                                    ((tmp[0] >= 97) && (tmp[0] <= 122))
                                    || ((tmp[0] >= 65) && (tmp[0] <= 90))
                                    || ((tmp[0] >= 48) && (tmp[0] <= 57))
                                    )
                                    )
                                {
                                    // ���Ʋ������������, ��ĸ���ַ�
                                    tmp = "";
                                }

                                break;
                            }

                        case 162: // ��������
                            {
                                if (local[i + 1] > 160)
                                {
                                    tmp = _charIndex[local[i + 1] - 160];
                                }
                                else if ((options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation)
                                {
                                    // ������������, ���ܷ�����ַ�����������
                                    tmp = "?";
                                }
                                else
                                {
                                    tmp = "";
                                }

                                break;
                            }

                        case 166: // ϣ����ĸ
                            {
                                throw new Exception("TO DO");
                            }

                        default:
                            {
                                int index = SpellCodeIndex[local[i] - 129, local[i + 1] - 64] - 1;

                                if (index == 0)	// �޴˺���, ���ܷ�����ַ�, GBK ����
                                {
                                    //�Ƿ���Ϊ�ʺ�
                                    tmp = (options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation ? "?" : "";
                                }
                                else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly)	 //�ǵ�ƴ��
                                {
                                    tmp = SpellMusicCode[index].ToUpper().Substring(0, 1);
                                }
                                else
                                {
                                    tmp = SpellMusicCode[index].ToUpper();
                                }

                                break;
                            }

                    }	//end of swicth()

                    result.Append(tmp);
                    i += 2;

                }
                else // �� GBK �ַ�����, ������ַ�
                {
                    if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter)
                        || (
                        ((local[i] >= 97) && (local[i] <= 122))
                        || ((local[i] >= 65) && (local[i] <= 90))
                        || ((local[i] >= 48) && (local[i] <= 57))
                        )
                        )
                    {
                        result.Append(((char)local[i]).ToString(CultureInfo.InvariantCulture).ToUpper());
                    }

                    i++;
                }

            }

            return result.ToString();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// »ñÈ¡ºº×ֵĺºÓïÆ´Òô
        /// </summary>
        /// <param name="strChinese">Óûת»»µÄ×Ö·û´®</param>
        /// <param name="options">SpellOptionsö¾ÙÖµµÄ°´Î» OR ×éºÏ</param>
        /// <example>
        /// <code>
        /// string strChinese = "Ò»Ö»×ØÉ«ºüÀêÌø¹ýÄÇÖ»¹·";
        /// string result = Spell.MakeSpellCode(strChinese, SpellOptions.EnableUnicodeLetter);
        /// </code>
        /// </example>
        /// <returns></returns>
        public static string MakeSpellCode(string strChinese, SpellOptions options)
        {
            Encoding encoding = Encoding.GetEncoding("GB2312");

            byte[] local = encoding.GetBytes(strChinese);

            //			local = encoding.GetString(local_v1);

            int i = 0;
            var result = new StringBuilder();

            while (i < local.Length)
            {
                // ÊÇ·ñΪGBK ×Ö·û
                if ((local[i] >= 129)
                    && (local[i + 1] >= 64))
                {
                    string tmp;
                    switch ((int)local[i])
                    {
                        case 163:	// È«½Ç ASCII
                            {
                                tmp = ((char)(local[i + 1] - 128)).ToString(CultureInfo.InvariantCulture);

                                if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter)
                                    && !(
                                    ((tmp[0] >= 97) && (tmp[0] <= 122))
                                    || ((tmp[0] >= 65) && (tmp[0] <= 90))
                                    || ((tmp[0] >= 48) && (tmp[0] <= 57))
                                    )
                                    )
                                {
                                    // ¿ØÖƲ»ÄÜÊä³ö·ÇÊý×Ö, ×ÖĸµÄ×Ö·û
                                    tmp = "";
                                }

                                break;
                            }

                        case 162: // ÂÞÂíÊý×Ö
                            {
                                if (local[i + 1] > 160)
                                {
                                    tmp = _charIndex[local[i + 1] - 160];
                                }
                                else if ((options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation)
                                {
                                    // ÔÚÂÞÂíÊý×ÖÇø, ²»ÄÜ·­ÒëµÄ×Ö·û·ÇÂÞÂíÊý×Ö
                                    tmp = "?";
                                }
                                else
                                {
                                    tmp = "";
                                }

                                break;
                            }

                        case 166: // Ï£À°×Öĸ
                            {
                                throw new Exception("TO DO");
                            }

                        default:
                            {
                                int index = SpellCodeIndex[local[i] - 129, local[i + 1] - 64] - 1;

                                if (index == 0)	// Î޴˺º×Ö, ²»ÄÜ·­ÒëµÄ×Ö·û, GBK ±£Áô
                                {
                                    //ÊÇ·ñ·­ÒëΪÎʺÅ
                                    tmp = (options & SpellOptions.TranslateUnknowWordToInterrogation) == SpellOptions.TranslateUnknowWordToInterrogation ? "?" : "";
                                }
                                else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly)	 //Êǵ¥Æ´Òô
                                {
                                    tmp = SpellMusicCode[index].ToUpper().Substring(0, 1);
                                }
                                else
                                {
                                    tmp = SpellMusicCode[index].ToUpper();
                                }

                                break;
                            }

                    }	//end of swicth()

                    result.Append(tmp);
                    i += 2;

                }
                else // ÔÚ GBK ×Ö·û¼¯Íâ, ¼´°ë½Ç×Ö·û
                {
                    if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter)
                        || (
                        ((local[i] >= 97) && (local[i] <= 122))
                        || ((local[i] >= 65) && (local[i] <= 90))
                        || ((local[i] >= 48) && (local[i] <= 57))
                        )
                        )
                    {
                        result.Append(((char)local[i]).ToString(CultureInfo.InvariantCulture).ToUpper());
                    }

                    i++;
                }

            }

            return result.ToString();
        }
 public SpellAnalyzer(SpellOptions options)
 {
     _options = options;
 }
Ejemplo n.º 4
0
        /// <summary>
        /// 将中文转化为拼音
        /// </summary>
        /// <param name="strChinese">中文</param>
        /// <param name="options">选项</param>
        /// <returns></returns>
        public static string MakeSpellCode(string strChinese, string splitChar, SpellOptions options)
        {
            byte[] bytes = Encoding.GetEncoding("GB2312").GetBytes(strChinese);
            int index = 0;
            StringBuilder sb = new StringBuilder();
            string append = "";
            int length = bytes.Length;
            while (index < length)
            {
                char ch2;
                if ((bytes[index] < 0x81) || (bytes[index + 1] < 0x40))
                {
                    goto Label_0187;
                }
                switch (bytes[index])
                {
                    case 0xa2:
                        if (bytes[index + 1] <= 160)
                        {
                            break;
                        }
                        append = _charIndex[bytes[index + 1] - 160];
                        goto Label_016A;

                    case 0xa3:
                        append = ((char)(bytes[index + 1] - 0x80)).ToString();
                        if (((((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter) && ((append[0] < 'a') || (append[0] > 'z'))) && ((append[0] < 'A') || (append[0] > 'Z'))) && ((append[0] < '0') || (append[0] > '9')))
                        {
                            append = "";
                        }
                        goto Label_016A;

                    case 0xa6:
                        append = "";
                        goto Label_016A;

                    default:
                        {
                            int num2 = _spellCodeIndex[bytes[index] - 0x81, bytes[index + 1] - 0x40] - 1;
                            if (num2 == -1)
                            {
                                append = "";
                            }
                            else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly)
                            {
                                append = _spellMusicCode[num2].Substring(0, 1);
                            }
                            else
                            {
                                append = _spellMusicCode[num2];
                            }
                            goto Label_016A;
                        }
                }
                append = "";
            Label_016A:
                if (!string.IsNullOrEmpty(append))
                {
                    AppendString(sb, append, splitChar);
                    //AppendString(sb, append.Substring(0, 1).ToUpper(), splitChar);
                }
                index += 2;
                continue;
            Label_0187:
                ch2 = (char)bytes[index];
                AppendString(sb, ch2.ToString(), splitChar);
                index++;
            }
            return sb.ToString();
        }
Ejemplo n.º 5
0
        /// <summary>
        /// 获取汉字的汉语拼音
        /// </summary>
        /// <param name="chineseStr">欲转换的字符串</param>
        /// <param name="options">SpellOptions枚举值的按位 OR 组合</param>
        /// <returns></returns>
        public static string GetSpellWord(string chineseStr, SpellOptions options)
        {
            Encoding encoding = Encoding.GetEncoding("GB2312");

            byte[] local = encoding.GetBytes(chineseStr);

            int i = 0;
            int index;
            StringBuilder result = new StringBuilder();
            string tmp = "";

            while (i < local.Length)
            {
                // 是否为GBK 字符
                if ((local[i] >= 129)
                    && (local[i + 1] >= 64))
                {
                    switch ((int) local[i])
                    {
                        case 163: // 全角 ASCII
                        {
                            tmp = ((char) (local[i + 1] - 128)).ToString();

                            if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter)
                                && !(
                                    ((tmp[0] >= 97) && (tmp[0] <= 122))
                                    || ((tmp[0] >= 65) && (tmp[0] <= 90))
                                    || ((tmp[0] >= 48) && (tmp[0] <= 57))
                                    )
                                )
                            {
                                // 控制不能输出非数字, 字母的字符
                                tmp = "";
                            }

                            break;
                        }

                        case 162: // 罗马数字
                        {
                            if (local[i + 1] > 160)
                            {
                                tmp = _charIndex[(int) local[i + 1] - 160];
                            }
                            else if ((options & SpellOptions.TranslateUnknowWordToInterrogation) ==
                                     SpellOptions.TranslateUnknowWordToInterrogation)
                            {
                                // 在罗马数字区, 不能翻译的字符非罗马数字
                                tmp = "?";
                            }
                            else
                            {
                                tmp = "";
                            }

                            break;
                        }

                        case 166: // 希腊字母
                        {
                            //
                            // TODO:未实现
                            //
                            break;
                        }

                        default:
                        {
                            index = _spellCodeIndex[(int) local[i] - 129, local[i + 1] - 64] - 1;

                            if (index == 0) // 无此汉字, 不能翻译的字符, GBK 保留
                            {
                                //是否翻译为问号
                                if ((options & SpellOptions.TranslateUnknowWordToInterrogation) ==
                                    SpellOptions.TranslateUnknowWordToInterrogation)
                                {
                                    tmp = "?";
                                }
                                else
                                {
                                    tmp = "";
                                }
                            }
                            else if ((options & SpellOptions.FirstLetterOnly) == SpellOptions.FirstLetterOnly) //是单拼音
                            {
                                tmp = _spellMusicCode[index].ToUpper().Substring(0, 1);
                            }
                            else
                            {
                                tmp = _spellMusicCode[index].ToUpper();
                            }

                            break;
                        }
                    } //end of swicth()

                    result.Append(tmp);
                    i += 2;
                }
                else // 在 GBK 字符集外, 即半角字符
                {
                    if (((options & SpellOptions.EnableUnicodeLetter) == SpellOptions.EnableUnicodeLetter)
                        || (
                            ((local[i] >= 97) && (local[i] <= 122))
                            || ((local[i] >= 65) && (local[i] <= 90))
                            || ((local[i] >= 48) && (local[i] <= 57))
                            )
                        )
                    {
                        result.Append(((char) local[i]).ToString().ToUpper());
                    }
                    //note:下行修改过的
                    else if ((options & SpellOptions.TranslateSpecialWordToConnect) ==
                             SpellOptions.TranslateSpecialWordToConnect)
                    {
                        result.Append("-");
                    }

                    i ++;
                }
            }

            return result.ToString();
        }