private string Hanzi2Pinyin(RequestModel request) { var result = new StringBuilder(); // 解析从客户端来的输出格式设置 PinyinOutputFormat format = new PinyinOutputFormat(request.ToneType, request.CaseType, request.VType); foreach (char ch in request.Key) { if (!PinyinUtil.IsHanzi(ch)) {// 不是汉字直接追加 result.Append(ch); continue; } // 是汉字才处理 // 是否只取第一个拼音 if (request.Multi.Equals("first", StringComparison.OrdinalIgnoreCase)) { // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确 result.AppendFormat("{0} ", Pinyin4Net.GetUniqueOrFirstPinyinWithFormat(ch, format)); continue; } string[] py = Pinyin4Net.GetPinyinWithFormat(ch, format); result.AppendFormat("({0}) ", string.Join(",", py)); } return(result.ToString()); }
/** * @param pinyinStr * unformatted Hanyu Pinyin string * @param outputFormat * given format of Hanyu Pinyin * @return formatted Hanyu Pinyin string * @throws BadHanyuPinyinOutputFormatCombination */ internal static string FormatPinyin(string pinyinStr, PinyinOutputFormat outputFormat) { if ((ToneType.WITH_TONE_MARK == outputFormat.ToneType) && ((VCharType.WITH_V == outputFormat.VCharType) || (VCharType.WITH_U_AND_COLON == outputFormat.VCharType))) { throw new BadHanyuPinyinOutputFormatCombination("tone marks cannot be added to v or u:"); } if (ToneType.WITHOUT_TONE == outputFormat.ToneType) { pinyinStr = pinyinStr.Replace("1", "").Replace("2", "").Replace("3", "").Replace("4", "").Replace("5", ""); } else if (ToneType.WITH_TONE_MARK == outputFormat.ToneType) { pinyinStr = pinyinStr.Replace("u:", "v"); pinyinStr = convertToneNumber2ToneMark(pinyinStr); } if (VCharType.WITH_V == outputFormat.VCharType) { pinyinStr = pinyinStr.Replace("u:", "v"); } else if (VCharType.WITH_U_UNICODE == outputFormat.VCharType) { pinyinStr = pinyinStr.Replace("u:", "ü"); } if (CaseType.UPPERCASE == outputFormat.CaseType) { pinyinStr = pinyinStr.ToUpper(); } return(pinyinStr); }
public void FormatTest2() { var format = new PinyinOutputFormat("WITHOUT_TONE", "LOWERCASE", "WITH_U_UNICODE"); Assert.AreEqual(ToneFormat.WITHOUT_TONE, format.GetToneFormat); Assert.AreEqual(CaseFormat.LOWERCASE, format.GetCaseFormat); Assert.AreEqual(VCharFormat.WITH_U_UNICODE, format.GetVCharFormat); }
public void TestString2() { const string s = "Javascript 爱好者 传说"; var format = new PinyinOutputFormat(null, CaseFormat.CAPITALIZE_FIRST_LETTER.ToString(), VCharFormat.WITH_U_UNICODE.ToString()); const string expected = "Javascript Ài Hăo Zhĕ Chuán Shuō"; var pinyin = Pinyin4Net.GetPinyin(s, format); Assert.AreEqual(expected, pinyin); }
public void TestString4() { const string s = "Javascript 爱好者 传说"; var format = new PinyinOutputFormat(null, CaseFormat.UPPERCASE.ToString(), VCharFormat.WITH_U_UNICODE.ToString()); const string expected = "JAVASCRIPT ÀI HĂO ZHĔ CHUÁN SHUŌ"; var pinyin = Pinyin4Net.GetPinyin(s, format, true, false, false); Assert.AreEqual(expected, pinyin); }
public void TestString6() { const string s = "JavaScript 爱好者 传说"; var format = new PinyinOutputFormat(null, CaseFormat.LOWERCASE.ToString(), VCharFormat.WITH_U_UNICODE.ToString()); const string expected = "javascript ài hăo zhĕ chuán shuō"; var pinyin = Pinyin4Net.GetPinyin(s, format, true, false, false); Assert.AreEqual(expected, pinyin); }
public void TestString1() { const string s = "Javascript 爱好者 传说"; var format = new PinyinOutputFormat(); const string expected = "Javascript ài hăo zhĕ chuán shuō"; var pinyin = Pinyin4Net.GetPinyin(s, format); Assert.AreEqual(expected, pinyin); }
public void FormatTest2() { var format = new PinyinOutputFormat(); PinyinFormatAssert("李", "lĭ", format); PinyinFormatAssert("单于", "chán yú", format); PinyinFormatAssert("乐", "yuè", format); PinyinFormatAssert("厍", "shè", format); PinyinFormatAssert("欧", "ōu", format); }
public void FormatTest1() { var format = new PinyinOutputFormat(ToneFormat.WITH_TONE_MARK, CaseFormat.LOWERCASE, VCharFormat.WITH_U_UNICODE); PinyinFormatAssert("李", "lĭ", format); PinyinFormatAssert("单于", "chán yú", format); PinyinFormatAssert("乐", "yuè", format); PinyinFormatAssert("厍", "shè", format); PinyinFormatAssert("欧", "ōu", format); }
public void FormatTest3() { var format = new PinyinOutputFormat(null, CaseFormat.CAPITALIZE_FIRST_LETTER.ToString(), VCharFormat.WITH_U_UNICODE.ToString()); PinyinFormatAssert("李", "Lĭ", format); PinyinFormatAssert("单于", "Chán Yú", format); PinyinFormatAssert("乐", "Yuè", format); PinyinFormatAssert("厍", "Shè", format); PinyinFormatAssert("欧", "Ōu", format); }
public void FormatTest4() { var format = new PinyinOutputFormat(null, CaseFormat.UPPERCASE.ToString(), VCharFormat.WITH_U_UNICODE.ToString()); PinyinFormatAssert("李", "LĬ", format); PinyinFormatAssert("单于", "CHÁN YÚ", format); PinyinFormatAssert("乐", "YUÈ", format); PinyinFormatAssert("厍", "SHÈ", format); PinyinFormatAssert("欧", "ŌU", format); }
/// <summary> /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式) /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。firstLetterOnly为false时有效 </param> /// <param name="pinyinHandler"> /// 拼音处理器,在获取到拼音后通过这个来处理, /// 如果传null,则默认取第一个拼音(多音字), /// 参数: /// 1 string[] 拼音数组 /// 2 char 当前的汉字 /// 3 string 要转成拼音的字符串 /// return 拼音字符串,这个返回值将作为这个汉字的拼音放到结果中 /// </param> /// <returns>firstLetterOnly为true时,只取拼音首字母格式为[L],后面追加空格;multiFirstLetter为true时,多音字的多个拼音首字母格式为[L, H],后面追加空格</returns> public static string GetPinyin(string text, PinyinOutputFormat format, bool caseSpread, Func <string[], char, string, string> pinyinHandler) { if (string.IsNullOrEmpty(text)) { return(""); } var pinyin = new StringBuilder(); var firstLetterBuf = new List <string>(); foreach (var item in text) { if (!PinyinUtil.IsHanzi(item)) { pinyin.Append(item); continue; } var pinyinTemp = PinyinDB.Instance.GetPinyin(item); pinyin.Append(pinyinHandler == null ? pinyinTemp[0] : pinyinHandler.Invoke(pinyinTemp, item, text)); firstLetterBuf.Clear(); firstLetterBuf.AddRange(GetPinyin(item) .Where(py => !firstLetterBuf.Contains(py[0].ToString())) .Select(py => py[0].ToString())); pinyin.AppendFormat("[{0}] ", string.Join(",", firstLetterBuf.ToArray())); } #region // 扩展大小写格式 if (!caseSpread) { return(pinyin.ToString().Trim()); } switch (format.GetCaseFormat) { case CaseFormat.CAPITALIZE_FIRST_LETTER: return(CapitalizeFirstLetter(pinyin).Trim()); case CaseFormat.LOWERCASE: return(pinyin.ToString().ToLower()); case CaseFormat.UPPERCASE: return(pinyin.ToString().ToUpper()); default: return(pinyin.ToString()); } #endregion }
/** * Get the first Hanyu Pinyin of a Chinese character <b> This function will * be removed in next release. </b> * * @param ch * The given Unicode character * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @return Return the first Hanyu Pinyin of given Chinese character; return * null if the input is not a Chinese character * * @deprecated DO NOT use it again because the first retrived pinyin string * may be a wrong pronouciation in a certain sentence context. * <b> This function will be removed in next release. </b> */ private static string getFirstHanyuPinyinString(char ch, PinyinOutputFormat outputFormat) { string[] pinyinStrArray = getFormattedHanyuPinyinStringArray(ch, outputFormat); if ((null != pinyinStrArray) && (pinyinStrArray.Length > 0)) { return(pinyinStrArray[0]); } else { return(null); } }
public void FormatTest3() { var format = new PinyinOutputFormat(ToneFormat.WITH_TONE_MARK, CaseFormat.LOWERCASE, VCharFormat.WITH_U_UNICODE); PinyinFormatAssert('啊', "a", format); PinyinFormatAssert('俄', "é", format); PinyinFormatAssert('李', "lĭ", format); PinyinFormatAssert('雨', "yŭ", format); PinyinFormatAssert('绿', "lǜ", format); PinyinFormatAssert('木', "mù", format); PinyinFormatAssert('按', "àn", format); PinyinFormatAssert('门', "mén", format); PinyinFormatAssert('欧', "ōu", format); }
/** * Return the formatted Hanyu Pinyin representations of the given Chinese * character (both in Simplified and Tranditional) in array format. * * @param ch * the given Chinese character * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @return The formatted Hanyu Pinyin representations of the given codepoint * in array format; null if no record is found in the hashtable. */ private static string[] getFormattedHanyuPinyinStringArray(char ch, PinyinOutputFormat outputFormat) { string[] pinyinStrArray = getUnformattedHanyuPinyinStringArray(ch); if (null != pinyinStrArray) { for (int i = 0; i < pinyinStrArray.Length; i++) { pinyinStrArray[i] = PinyinFormatter.FormatPinyin(pinyinStrArray[i], outputFormat); } return(pinyinStrArray); } else { return(null); } }
/** * Get a string which all Chinese characters are replaced by corresponding * main (first) Hanyu Pinyin representation. * * <p> * <b>Special Note</b>: If the return contains "none0", that means that * Chinese character is in Unicode CJK talbe, however, it has not * pronounciation in Chinese. <b> This interface will be removed in next * release. </b> * * @param str * A given string contains Chinese characters * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @param seperater * The string is appended after a Chinese character (excluding * the last Chinese character at the end of sentence). <b>Note! * Seperater will not appear after a non-Chinese character</b> * @return a String identical to the original one but all recognizable * Chinese characters are converted into main (first) Hanyu Pinyin * representation * * @deprecated DO NOT use it again because the first retrived pinyin string * may be a wrong pronouciation in a certain sentence context. * <b> This interface will be removed in next release. </b> */ static public string ToHanyuPinyinString(string str, PinyinOutputFormat outputFormat, string seperater) { StringBuilder resultPinyinStrBuf = new StringBuilder(); for (int i = 0; i < str.Length; i++) { string mainPinyinStrOfChar = getFirstHanyuPinyinString(str[i], outputFormat); if (null != mainPinyinStrOfChar) { resultPinyinStrBuf.Append(mainPinyinStrOfChar); if (i != str.Length - 1) { // avoid appending at the end resultPinyinStrBuf.Append(seperater); } } else { resultPinyinStrBuf.Append(str[i]); } } return(resultPinyinStrBuf.ToString()); }
/// <summary> /// 判断拼音格式 /// </summary> /// <param name="hanzi"></param> /// <param name="expected"></param> /// <param name="format"></param> private void PinyinFormatAssert(string hanzi, string expected, PinyinOutputFormat format) { var fmted = Pinyin4Name.GetPinyinWithFormat(hanzi, format); Assert.AreEqual(expected, fmted); }
/// <summary> /// 判断拼音格式 /// </summary> /// <param name="hanzi"></param> /// <param name="expected"></param> /// <param name="format"></param> private void PinyinFormatAssert(char hanzi, string expected, PinyinOutputFormat format) { var fmted = Pinyin4Net.GetUniqueOrFirstPinyinWithFormat(hanzi, format); Assert.AreEqual(expected, fmted); }
/** * Get all Hanyu Pinyin presentations of a single Chinese character (both * Simplified and Tranditional) * * <p> * For example, <br/> If the input is '间', the return will be an array with * two Hanyu Pinyin strings: <br/> "jian1" <br/> "jian4" <br/> <br/> If the * input is '李', the return will be an array with single Hanyu Pinyin * string: <br/> "li3" * * <p> * <b>Special Note</b>: If the return is "none0", that means the input * Chinese character is in Unicode CJK talbe, however, it has no * pronounciation in Chinese * * @param ch * the given Chinese character * @param outputFormat * describes the desired format of returned Hanyu Pinyin String * * @return a String array contains all Hanyu Pinyin presentations with tone * numbers; return null for non-Chinese character * * @throws BadHanyuPinyinOutputFormatCombination * if certain combination of output formats happens * * @see HanyuPinyinOutputFormat * @see BadHanyuPinyinOutputFormatCombination * */ public static string[] ToHanyuPinyinStringArray(char ch, PinyinOutputFormat outputFormat) { return(getFormattedHanyuPinyinStringArray(ch, outputFormat)); }
/// <summary> /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式) /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。firstLetterOnly为false时有效 </param> /// <param name="firstLetterOnly">是否只取拼音首字母,为true时,format无效</param> /// <param name="multiFirstLetter">firstLetterOnly为true时有效,多音字的多个读音首字母是否全取,如果多音字拼音首字母相同,只保留一个</param> /// <returns>firstLetterOnly为true时,只取拼音首字母格式为[L],后面追加空格;multiFirstLetter为true时,多音字的多个拼音首字母格式为[L, H],后面追加空格</returns> public static string GetPinyin(string text, PinyinOutputFormat format, bool caseSpread, bool firstLetterOnly, bool multiFirstLetter) { if (string.IsNullOrEmpty(text)) { return(""); } var pinyin = new StringBuilder(); var firstLetterBuf = new List <string>(); foreach (var item in text) { if (!PinyinUtil.IsHanzi(item)) { pinyin.Append(item); continue; } if (!firstLetterOnly) { pinyin.Append(GetUniqueOrFirstPinyinWithFormat(item, format) + " "); continue; } if (!multiFirstLetter) { pinyin.AppendFormat("[{0}] ", GetUniqueOrFirstPinyin(item)[0]); continue; } firstLetterBuf.Clear(); firstLetterBuf.AddRange(GetPinyin(item) .Select(py => py[0].ToString()) // 这句是处理多音字,多音字的拼音可能首字母是一样的, // 如果是一样的,肯定就只返回一次 .Distinct()); pinyin.AppendFormat("[{0}] ", string.Join(",", firstLetterBuf.ToArray())); } #region // 扩展大小写格式 if (firstLetterOnly || !caseSpread) { return(pinyin.ToString().Trim()); } switch (format.GetCaseFormat) { case CaseFormat.CAPITALIZE_FIRST_LETTER: return(CapitalizeFirstLetter(pinyin)); case CaseFormat.LOWERCASE: return(pinyin.ToString().Trim().ToLower()); case CaseFormat.UPPERCASE: return(pinyin.ToString().Trim().ToUpper()); default: return(pinyin.ToString().Trim()); } #endregion }
/// <summary> /// 获取格式化后的唯一拼音(单音字)或者第一个拼音(多音字) /// </summary> /// <param name="hanzi">要查询拼音的汉字字符</param> /// <param name="format">拼音输出格式化参数</param> /// <see cref="PinyinOutputFormat"/> /// <seealso cref="PinyinFormatter"/> /// <returns>格式化后的唯一拼音(单音字)或者第一个拼音(多音字)</returns> /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception> public static string GetUniqueOrFirstPinyinWithFormat(char hanzi, PinyinOutputFormat format) { return(PinyinFormatter.Format(GetUniqueOrFirstPinyin(hanzi), format)); }
/// <summary> /// 获取格式化后的拼音 /// </summary> /// <param name="hanzi">要查询拼音的汉字字符</param> /// <param name="format">拼音输出格式化参数</param> /// <see cref="PinyinOutputFormat"/> /// <seealso cref="PinyinFormatter"/> /// <returns>经过格式化的拼音</returns> /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception> public static string[] GetPinyinWithFormat(char hanzi, PinyinOutputFormat format) { return(GetPinyin(hanzi).Select(item => PinyinFormatter.Format(item, format)).ToArray()); }
/// <summary> /// 获取格式化后的拼音 /// </summary> /// <param name="firstName">要查询拼音的姓</param> /// <param name="format">输出拼音格式化参数</param> /// <see cref="PinyinOutputFormat"/> /// <seealso cref="PinyinFormatter"/> /// <returns>返回格式化后的拼音,若未找到姓,则返回null</returns> /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception> public static string GetPinyinWithFormat(string firstName, PinyinOutputFormat format) { return(string.Join(" ", GetPinyin(firstName).Split(' ').Select(item => PinyinFormatter.Format(item, format)))); }
public static string GetPinyin(string input) { var format = new PinyinOutputFormat(ToneFormat.WITHOUT_TONE, CaseFormat.LOWERCASE, VCharFormat.WITH_U_UNICODE); return(Pinyin4Net.GetPinyin(input, format)); }
/// <summary> /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式),format中指定的大小写模式不会扩展到非拼音字符 /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <returns>格式化后的拼音字符串</returns> public static string GetPinyin(string text, PinyinOutputFormat format) { return(GetPinyin(text, format, false, false, false)); }