コード例 #1
0
ファイル: HomeController.cs プロジェクト: zm55/Pinyin4NET
        private string Hanzi2Pinyin(RequestModel request)
        {
            var result = new StringBuilder();

            // 解析从客户端来的输出格式设置
            PinyinOutputFormat format = new PinyinOutputFormat(request.ToneType,
                                                               request.CaseType, request.VType);

            foreach (char ch in request.Key)
            {
                if (!PinyinUtil.IsHanzi(ch))
                {// 不是汉字直接追加
                    result.Append(ch);
                    continue;
                }

                // 是汉字才处理

                // 是否只取第一个拼音
                if (request.Multi.Equals("first", StringComparison.OrdinalIgnoreCase))
                {
                    // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确
                    result.AppendFormat("{0} ", Pinyin4Net.GetUniqueOrFirstPinyinWithFormat(ch, format));
                    continue;
                }

                string[] py = Pinyin4Net.GetPinyinWithFormat(ch, format);
                result.AppendFormat("({0}) ", string.Join(",", py));
            }

            return(result.ToString());
        }
コード例 #2
0
        /**
         * @param pinyinStr
         *            unformatted Hanyu Pinyin string
         * @param outputFormat
         *            given format of Hanyu Pinyin
         * @return formatted Hanyu Pinyin string
         * @throws BadHanyuPinyinOutputFormatCombination
         */
        internal static string FormatPinyin(string pinyinStr, PinyinOutputFormat outputFormat)
        {
            if ((ToneType.WITH_TONE_MARK == outputFormat.ToneType) &&
                ((VCharType.WITH_V == outputFormat.VCharType) ||
                 (VCharType.WITH_U_AND_COLON == outputFormat.VCharType)))
            {
                throw new BadHanyuPinyinOutputFormatCombination("tone marks cannot be added to v or u:");
            }

            if (ToneType.WITHOUT_TONE == outputFormat.ToneType)
            {
                pinyinStr = pinyinStr.Replace("1", "").Replace("2", "").Replace("3", "").Replace("4", "").Replace("5", "");
            }
            else if (ToneType.WITH_TONE_MARK == outputFormat.ToneType)
            {
                pinyinStr = pinyinStr.Replace("u:", "v");
                pinyinStr = convertToneNumber2ToneMark(pinyinStr);
            }

            if (VCharType.WITH_V == outputFormat.VCharType)
            {
                pinyinStr = pinyinStr.Replace("u:", "v");
            }
            else if (VCharType.WITH_U_UNICODE == outputFormat.VCharType)
            {
                pinyinStr = pinyinStr.Replace("u:", "ü");
            }

            if (CaseType.UPPERCASE == outputFormat.CaseType)
            {
                pinyinStr = pinyinStr.ToUpper();
            }
            return(pinyinStr);
        }
コード例 #3
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        public void FormatTest2()
        {
            var format = new PinyinOutputFormat("WITHOUT_TONE", "LOWERCASE", "WITH_U_UNICODE");

            Assert.AreEqual(ToneFormat.WITHOUT_TONE, format.GetToneFormat);
            Assert.AreEqual(CaseFormat.LOWERCASE, format.GetCaseFormat);
            Assert.AreEqual(VCharFormat.WITH_U_UNICODE, format.GetVCharFormat);
        }
コード例 #4
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        public void TestString2()
        {
            const string s        = "Javascript 爱好者 传说";
            var          format   = new PinyinOutputFormat(null, CaseFormat.CAPITALIZE_FIRST_LETTER.ToString(), VCharFormat.WITH_U_UNICODE.ToString());
            const string expected = "Javascript Ài Hăo Zhĕ  Chuán Shuō";
            var          pinyin   = Pinyin4Net.GetPinyin(s, format);

            Assert.AreEqual(expected, pinyin);
        }
コード例 #5
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        public void TestString4()
        {
            const string s        = "Javascript 爱好者 传说";
            var          format   = new PinyinOutputFormat(null, CaseFormat.UPPERCASE.ToString(), VCharFormat.WITH_U_UNICODE.ToString());
            const string expected = "JAVASCRIPT ÀI HĂO ZHĔ  CHUÁN SHUŌ";
            var          pinyin   = Pinyin4Net.GetPinyin(s, format, true, false, false);

            Assert.AreEqual(expected, pinyin);
        }
コード例 #6
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        public void TestString6()
        {
            const string s        = "JavaScript 爱好者 传说";
            var          format   = new PinyinOutputFormat(null, CaseFormat.LOWERCASE.ToString(), VCharFormat.WITH_U_UNICODE.ToString());
            const string expected = "javascript ài hăo zhĕ  chuán shuō";
            var          pinyin   = Pinyin4Net.GetPinyin(s, format, true, false, false);

            Assert.AreEqual(expected, pinyin);
        }
コード例 #7
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        public void TestString1()
        {
            const string s        = "Javascript 爱好者 传说";
            var          format   = new PinyinOutputFormat();
            const string expected = "Javascript ài hăo zhĕ  chuán shuō";
            var          pinyin   = Pinyin4Net.GetPinyin(s, format);

            Assert.AreEqual(expected, pinyin);
        }
コード例 #8
0
        public void FormatTest2()
        {
            var format = new PinyinOutputFormat();

            PinyinFormatAssert("李", "lĭ", format);
            PinyinFormatAssert("单于", "chán yú", format);
            PinyinFormatAssert("乐", "yuè", format);
            PinyinFormatAssert("厍", "shè", format);
            PinyinFormatAssert("欧", "ōu", format);
        }
コード例 #9
0
        public void FormatTest1()
        {
            var format = new PinyinOutputFormat(ToneFormat.WITH_TONE_MARK, CaseFormat.LOWERCASE, VCharFormat.WITH_U_UNICODE);

            PinyinFormatAssert("李", "lĭ", format);
            PinyinFormatAssert("单于", "chán yú", format);
            PinyinFormatAssert("乐", "yuè", format);
            PinyinFormatAssert("厍", "shè", format);
            PinyinFormatAssert("欧", "ōu", format);
        }
コード例 #10
0
        public void FormatTest3()
        {
            var format = new PinyinOutputFormat(null, CaseFormat.CAPITALIZE_FIRST_LETTER.ToString(), VCharFormat.WITH_U_UNICODE.ToString());

            PinyinFormatAssert("李", "Lĭ", format);
            PinyinFormatAssert("单于", "Chán Yú", format);
            PinyinFormatAssert("乐", "Yuè", format);
            PinyinFormatAssert("厍", "Shè", format);
            PinyinFormatAssert("欧", "Ōu", format);
        }
コード例 #11
0
        public void FormatTest4()
        {
            var format = new PinyinOutputFormat(null, CaseFormat.UPPERCASE.ToString(), VCharFormat.WITH_U_UNICODE.ToString());

            PinyinFormatAssert("李", "LĬ", format);
            PinyinFormatAssert("单于", "CHÁN YÚ", format);
            PinyinFormatAssert("乐", "YUÈ", format);
            PinyinFormatAssert("厍", "SHÈ", format);
            PinyinFormatAssert("欧", "ŌU", format);
        }
コード例 #12
0
ファイル: Pinyin4Net.cs プロジェクト: zm55/Pinyin4NET
        /// <summary>
        /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式)
        /// </summary>
        /// <param name="text">要获取拼音的汉字字符串</param>
        /// <param name="format">拼音输出格式化参数</param>
        /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。firstLetterOnly为false时有效 </param>
        /// <param name="pinyinHandler">
        /// 拼音处理器,在获取到拼音后通过这个来处理,
        /// 如果传null,则默认取第一个拼音(多音字),
        /// 参数:
        /// 1 string[] 拼音数组
        /// 2 char 当前的汉字
        /// 3 string 要转成拼音的字符串
        /// return 拼音字符串,这个返回值将作为这个汉字的拼音放到结果中
        /// </param>
        /// <returns>firstLetterOnly为true时,只取拼音首字母格式为[L],后面追加空格;multiFirstLetter为true时,多音字的多个拼音首字母格式为[L, H],后面追加空格</returns>
        public static string GetPinyin(string text, PinyinOutputFormat format, bool caseSpread, Func <string[], char, string, string> pinyinHandler)
        {
            if (string.IsNullOrEmpty(text))
            {
                return("");
            }

            var pinyin         = new StringBuilder();
            var firstLetterBuf = new List <string>();

            foreach (var item in text)
            {
                if (!PinyinUtil.IsHanzi(item))
                {
                    pinyin.Append(item);
                    continue;
                }

                var pinyinTemp = PinyinDB.Instance.GetPinyin(item);

                pinyin.Append(pinyinHandler == null ?
                              pinyinTemp[0] :
                              pinyinHandler.Invoke(pinyinTemp, item, text));

                firstLetterBuf.Clear();

                firstLetterBuf.AddRange(GetPinyin(item)
                                        .Where(py => !firstLetterBuf.Contains(py[0].ToString()))
                                        .Select(py => py[0].ToString()));

                pinyin.AppendFormat("[{0}] ", string.Join(",", firstLetterBuf.ToArray()));
            }
            #region // 扩展大小写格式
            if (!caseSpread)
            {
                return(pinyin.ToString().Trim());
            }

            switch (format.GetCaseFormat)
            {
            case CaseFormat.CAPITALIZE_FIRST_LETTER:
                return(CapitalizeFirstLetter(pinyin).Trim());

            case CaseFormat.LOWERCASE:
                return(pinyin.ToString().ToLower());

            case CaseFormat.UPPERCASE:
                return(pinyin.ToString().ToUpper());

            default:
                return(pinyin.ToString());
            }
            #endregion
        }
コード例 #13
0
        /**
         * Get the first Hanyu Pinyin of a Chinese character <b> This function will
         * be removed in next release. </b>
         *
         * @param ch
         *            The given Unicode character
         * @param outputFormat
         *            Describes the desired format of returned Hanyu Pinyin string
         * @return Return the first Hanyu Pinyin of given Chinese character; return
         *         null if the input is not a Chinese character
         *
         * @deprecated DO NOT use it again because the first retrived pinyin string
         *             may be a wrong pronouciation in a certain sentence context.
         *             <b> This function will be removed in next release. </b>
         */
        private static string getFirstHanyuPinyinString(char ch, PinyinOutputFormat outputFormat)
        {
            string[] pinyinStrArray = getFormattedHanyuPinyinStringArray(ch, outputFormat);

            if ((null != pinyinStrArray) && (pinyinStrArray.Length > 0))
            {
                return(pinyinStrArray[0]);
            }
            else
            {
                return(null);
            }
        }
コード例 #14
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        public void FormatTest3()
        {
            var format = new PinyinOutputFormat(ToneFormat.WITH_TONE_MARK, CaseFormat.LOWERCASE, VCharFormat.WITH_U_UNICODE);

            PinyinFormatAssert('啊', "a", format);
            PinyinFormatAssert('俄', "é", format);
            PinyinFormatAssert('李', "lĭ", format);
            PinyinFormatAssert('雨', "yŭ", format);
            PinyinFormatAssert('绿', "lǜ", format);
            PinyinFormatAssert('木', "mù", format);
            PinyinFormatAssert('按', "àn", format);
            PinyinFormatAssert('门', "mén", format);
            PinyinFormatAssert('欧', "ōu", format);
        }
コード例 #15
0
        /**
         * Return the formatted Hanyu Pinyin representations of the given Chinese
         * character (both in Simplified and Tranditional) in array format.
         *
         * @param ch
         *            the given Chinese character
         * @param outputFormat
         *            Describes the desired format of returned Hanyu Pinyin string
         * @return The formatted Hanyu Pinyin representations of the given codepoint
         *         in array format; null if no record is found in the hashtable.
         */
        private static string[] getFormattedHanyuPinyinStringArray(char ch, PinyinOutputFormat outputFormat)
        {
            string[] pinyinStrArray = getUnformattedHanyuPinyinStringArray(ch);

            if (null != pinyinStrArray)
            {
                for (int i = 0; i < pinyinStrArray.Length; i++)
                {
                    pinyinStrArray[i] = PinyinFormatter.FormatPinyin(pinyinStrArray[i], outputFormat);
                }

                return(pinyinStrArray);
            }
            else
            {
                return(null);
            }
        }
コード例 #16
0
        /**
         * Get a string which all Chinese characters are replaced by corresponding
         * main (first) Hanyu Pinyin representation.
         *
         * <p>
         * <b>Special Note</b>: If the return contains "none0", that means that
         * Chinese character is in Unicode CJK talbe, however, it has not
         * pronounciation in Chinese. <b> This interface will be removed in next
         * release. </b>
         *
         * @param str
         *            A given string contains Chinese characters
         * @param outputFormat
         *            Describes the desired format of returned Hanyu Pinyin string
         * @param seperater
         *            The string is appended after a Chinese character (excluding
         *            the last Chinese character at the end of sentence). <b>Note!
         *            Seperater will not appear after a non-Chinese character</b>
         * @return a String identical to the original one but all recognizable
         *         Chinese characters are converted into main (first) Hanyu Pinyin
         *         representation
         *
         * @deprecated DO NOT use it again because the first retrived pinyin string
         *             may be a wrong pronouciation in a certain sentence context.
         *             <b> This interface will be removed in next release. </b>
         */
        static public string ToHanyuPinyinString(string str, PinyinOutputFormat outputFormat, string seperater)
        {
            StringBuilder resultPinyinStrBuf = new StringBuilder();

            for (int i = 0; i < str.Length; i++)
            {
                string mainPinyinStrOfChar = getFirstHanyuPinyinString(str[i], outputFormat);

                if (null != mainPinyinStrOfChar)
                {
                    resultPinyinStrBuf.Append(mainPinyinStrOfChar);
                    if (i != str.Length - 1)
                    { // avoid appending at the end
                        resultPinyinStrBuf.Append(seperater);
                    }
                }
                else
                {
                    resultPinyinStrBuf.Append(str[i]);
                }
            }

            return(resultPinyinStrBuf.ToString());
        }
コード例 #17
0
        /// <summary>
        /// 判断拼音格式
        /// </summary>
        /// <param name="hanzi"></param>
        /// <param name="expected"></param>
        /// <param name="format"></param>
        private void PinyinFormatAssert(string hanzi, string expected, PinyinOutputFormat format)
        {
            var fmted = Pinyin4Name.GetPinyinWithFormat(hanzi, format);

            Assert.AreEqual(expected, fmted);
        }
コード例 #18
0
ファイル: PinyinDBTest.cs プロジェクト: zm55/Pinyin4NET
        /// <summary>
        /// 判断拼音格式
        /// </summary>
        /// <param name="hanzi"></param>
        /// <param name="expected"></param>
        /// <param name="format"></param>
        private void PinyinFormatAssert(char hanzi, string expected, PinyinOutputFormat format)
        {
            var fmted = Pinyin4Net.GetUniqueOrFirstPinyinWithFormat(hanzi, format);

            Assert.AreEqual(expected, fmted);
        }
コード例 #19
0
 /**
  * Get all Hanyu Pinyin presentations of a single Chinese character (both
  * Simplified and Tranditional)
  *
  * <p>
  * For example, <br/> If the input is '间', the return will be an array with
  * two Hanyu Pinyin strings: <br/> "jian1" <br/> "jian4" <br/> <br/> If the
  * input is '李', the return will be an array with single Hanyu Pinyin
  * string: <br/> "li3"
  *
  * <p>
  * <b>Special Note</b>: If the return is "none0", that means the input
  * Chinese character is in Unicode CJK talbe, however, it has no
  * pronounciation in Chinese
  *
  * @param ch
  *            the given Chinese character
  * @param outputFormat
  *            describes the desired format of returned Hanyu Pinyin String
  *
  * @return a String array contains all Hanyu Pinyin presentations with tone
  *         numbers; return null for non-Chinese character
  *
  * @throws BadHanyuPinyinOutputFormatCombination
  *             if certain combination of output formats happens
  *
  * @see HanyuPinyinOutputFormat
  * @see BadHanyuPinyinOutputFormatCombination
  *
  */
 public static string[] ToHanyuPinyinStringArray(char ch,
                                                 PinyinOutputFormat outputFormat)
 {
     return(getFormattedHanyuPinyinStringArray(ch, outputFormat));
 }
コード例 #20
0
ファイル: Pinyin4Net.cs プロジェクト: zm55/Pinyin4NET
        /// <summary>
        /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式)
        /// </summary>
        /// <param name="text">要获取拼音的汉字字符串</param>
        /// <param name="format">拼音输出格式化参数</param>
        /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。firstLetterOnly为false时有效 </param>
        /// <param name="firstLetterOnly">是否只取拼音首字母,为true时,format无效</param>
        /// <param name="multiFirstLetter">firstLetterOnly为true时有效,多音字的多个读音首字母是否全取,如果多音字拼音首字母相同,只保留一个</param>
        /// <returns>firstLetterOnly为true时,只取拼音首字母格式为[L],后面追加空格;multiFirstLetter为true时,多音字的多个拼音首字母格式为[L, H],后面追加空格</returns>
        public static string GetPinyin(string text, PinyinOutputFormat format, bool caseSpread, bool firstLetterOnly, bool multiFirstLetter)
        {
            if (string.IsNullOrEmpty(text))
            {
                return("");
            }

            var pinyin         = new StringBuilder();
            var firstLetterBuf = new List <string>();

            foreach (var item in text)
            {
                if (!PinyinUtil.IsHanzi(item))
                {
                    pinyin.Append(item);
                    continue;
                }

                if (!firstLetterOnly)
                {
                    pinyin.Append(GetUniqueOrFirstPinyinWithFormat(item, format) + " ");
                    continue;
                }
                if (!multiFirstLetter)
                {
                    pinyin.AppendFormat("[{0}] ", GetUniqueOrFirstPinyin(item)[0]);
                    continue;
                }

                firstLetterBuf.Clear();

                firstLetterBuf.AddRange(GetPinyin(item)
                                        .Select(py => py[0].ToString())
                                        // 这句是处理多音字,多音字的拼音可能首字母是一样的,
                                        // 如果是一样的,肯定就只返回一次
                                        .Distinct());

                pinyin.AppendFormat("[{0}] ", string.Join(",", firstLetterBuf.ToArray()));
            }

            #region // 扩展大小写格式
            if (firstLetterOnly || !caseSpread)
            {
                return(pinyin.ToString().Trim());
            }

            switch (format.GetCaseFormat)
            {
            case CaseFormat.CAPITALIZE_FIRST_LETTER:
                return(CapitalizeFirstLetter(pinyin));

            case CaseFormat.LOWERCASE:
                return(pinyin.ToString().Trim().ToLower());

            case CaseFormat.UPPERCASE:
                return(pinyin.ToString().Trim().ToUpper());

            default:
                return(pinyin.ToString().Trim());
            }
            #endregion
        }
コード例 #21
0
ファイル: Pinyin4Net.cs プロジェクト: zm55/Pinyin4NET
 /// <summary>
 /// 获取格式化后的唯一拼音(单音字)或者第一个拼音(多音字)
 /// </summary>
 /// <param name="hanzi">要查询拼音的汉字字符</param>
 /// <param name="format">拼音输出格式化参数</param>
 /// <see cref="PinyinOutputFormat"/>
 /// <seealso cref="PinyinFormatter"/>
 /// <returns>格式化后的唯一拼音(单音字)或者第一个拼音(多音字)</returns>
 /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception>
 public static string GetUniqueOrFirstPinyinWithFormat(char hanzi, PinyinOutputFormat format)
 {
     return(PinyinFormatter.Format(GetUniqueOrFirstPinyin(hanzi), format));
 }
コード例 #22
0
ファイル: Pinyin4Net.cs プロジェクト: zm55/Pinyin4NET
 /// <summary>
 /// 获取格式化后的拼音
 /// </summary>
 /// <param name="hanzi">要查询拼音的汉字字符</param>
 /// <param name="format">拼音输出格式化参数</param>
 /// <see cref="PinyinOutputFormat"/>
 /// <seealso cref="PinyinFormatter"/>
 /// <returns>经过格式化的拼音</returns>
 /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception>
 public static string[] GetPinyinWithFormat(char hanzi, PinyinOutputFormat format)
 {
     return(GetPinyin(hanzi).Select(item => PinyinFormatter.Format(item, format)).ToArray());
 }
コード例 #23
0
ファイル: Pinyin4Name.cs プロジェクト: zm55/Pinyin4NET
 /// <summary>
 /// 获取格式化后的拼音
 /// </summary>
 /// <param name="firstName">要查询拼音的姓</param>
 /// <param name="format">输出拼音格式化参数</param>
 /// <see cref="PinyinOutputFormat"/>
 /// <seealso cref="PinyinFormatter"/>
 /// <returns>返回格式化后的拼音,若未找到姓,则返回null</returns>
 /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception>
 public static string GetPinyinWithFormat(string firstName, PinyinOutputFormat format)
 {
     return(string.Join(" ", GetPinyin(firstName).Split(' ').Select(item => PinyinFormatter.Format(item, format))));
 }
コード例 #24
0
        public static string GetPinyin(string input)
        {
            var format = new PinyinOutputFormat(ToneFormat.WITHOUT_TONE, CaseFormat.LOWERCASE, VCharFormat.WITH_U_UNICODE);

            return(Pinyin4Net.GetPinyin(input, format));
        }
コード例 #25
0
ファイル: Pinyin4Net.cs プロジェクト: zm55/Pinyin4NET
 /// <summary>
 /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式),format中指定的大小写模式不会扩展到非拼音字符
 /// </summary>
 /// <param name="text">要获取拼音的汉字字符串</param>
 /// <param name="format">拼音输出格式化参数</param>
 /// <returns>格式化后的拼音字符串</returns>
 public static string GetPinyin(string text, PinyinOutputFormat format)
 {
     return(GetPinyin(text, format, false, false, false));
 }