public void ProcessRequest(HttpContext context) { HttpRequest req = context.Request; HttpResponse res = context.Response; // 这里设置UTF8,避免乱码 res.Charset = "UTF-8"; res.ContentEncoding = System.Text.Encoding.UTF8; res.ContentType = "text/plain"; string pinyin = string.Empty; try { string hanzi = req["hanzi"]; // 用于控制多音字的返回, 有两种取值 first:取第1个音,all:取所有音 默认为取第1个音 string multi = req["multi"]; // 请求参数不为空才处理 if (!string.IsNullOrEmpty(hanzi)) { #region // 解析从客户端来的输出格式设置 HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.setCaseType(new HanyuPinyinCaseType(req["caseType"])); format.setToneType(new HanyuPinyinToneType(req["toneType"])); format.setVCharType(new HanyuPinyinVCharType(req["vType"])); #endregion foreach (char ch in hanzi) { if (Util.IsHanzi(ch)) { // 是汉字才处理 string[] py = PinyinHelper.toHanyuPinyinStringArray(ch, format); if (multi.Equals("first", StringComparison.OrdinalIgnoreCase) || py.Length == 1) { // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确 pinyin += py[0] + " "; } else { pinyin += "(" + string.Join(",", py) + ") "; } } else {// 不是汉字直接追加 pinyin += ch.ToString(); } } } } catch (Exception ex) { pinyin = ex.Message; } res.Write(pinyin); }
/** * Get a string which all Chinese characters are replaced by corresponding * main (first) Hanyu Pinyin representation. * * <p> * <b>Special Note</b>: If the return contains "none0", that means that * Chinese character is in Unicode CJK talbe, however, it has not * pronounciation in Chinese. <b> This interface will be removed in next * release. </b> * * @param str * A given string contains Chinese characters * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @param seperater * The string is appended after a Chinese character (excluding * the last Chinese character at the end of sentence). <b>Note! * Seperater will not appear after a non-Chinese character</b> * @return a string identical to the original one but all recognizable * Chinese characters are converted into main (first) Hanyu Pinyin * representation * * @deprecated DO NOT use it again because the first retrived pinyin string * may be a wrong pronouciation in a certain sentence context. * <b> This interface will be removed in next release. </b> */ public static string toHanyuPinyinString(string str, HanyuPinyinOutputFormat outputFormat, string seperater) { StringBuilder resultPinyinStrBuf = new StringBuilder(); for (int i = 0; i < str.Length; i++) { string mainPinyinStrOfChar = getFirstHanyuPinyinString(str[i], outputFormat); if (null != mainPinyinStrOfChar) { resultPinyinStrBuf.Append(mainPinyinStrOfChar); if (i != str.Length - 1) { // avoid appending at the end resultPinyinStrBuf.Append(seperater); } } else { resultPinyinStrBuf.Append(str[i]); } } return resultPinyinStrBuf.ToString(); }
/** * Return the formatted Hanyu Pinyin representations of the given Chinese * character (both in Simplified and Tranditional) in array format. * * @param ch * the given Chinese character * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @return The formatted Hanyu Pinyin representations of the given codepoint * in array format; null if no record is found in the hashtable. */ private static string[] getFormattedHanyuPinyinStringArray(char ch, HanyuPinyinOutputFormat outputFormat) { string[] pinyinStrArray = getUnformattedHanyuPinyinStringArray(ch); if (null != pinyinStrArray) { for (int i = 0; i < pinyinStrArray.Length; i++) { pinyinStrArray[i] = PinyinFormatter.formatHanyuPinyin(pinyinStrArray[i], outputFormat); } return pinyinStrArray; } else return null; }
/** * Get the first Hanyu Pinyin of a Chinese character <b> This function will * be removed in next release. </b> * * @param ch * The given Unicode character * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @return Return the first Hanyu Pinyin of given Chinese character; return * null if the input is not a Chinese character * * @deprecated DO NOT use it again because the first retrived pinyin string * may be a wrong pronouciation in a certain sentence context. * <b> This function will be removed in next release. </b> */ private static string getFirstHanyuPinyinString(char ch, HanyuPinyinOutputFormat outputFormat) { string[] pinyinStrArray = getFormattedHanyuPinyinStringArray(ch, outputFormat); if ((null != pinyinStrArray) && (pinyinStrArray.Length > 0)) { return pinyinStrArray[0]; } else { return null; } }
/** * Get all Hanyu Pinyin presentations of a single Chinese character (both * Simplified and Tranditional) * * <p> * For example, <br/> If the input is '间', the return will be an array with * two Hanyu Pinyin strings: <br/> "jian1" <br/> "jian4" <br/> <br/> If the * input is '李', the return will be an array with single Hanyu Pinyin * string: <br/> "li3" * * <p> * <b>Special Note</b>: If the return is "none0", that means the input * Chinese character is in Unicode CJK talbe, however, it has no * pronounciation in Chinese * * @param ch * the given Chinese character * @param outputFormat * describes the desired format of returned Hanyu Pinyin string * * @return a string array contains all Hanyu Pinyin presentations with tone * numbers; return null for non-Chinese character * * @throws BadHanyuPinyinOutputFormatCombination * if certain combination of output formats happens * * @see HanyuPinyinOutputFormat * @see BadHanyuPinyinOutputFormatCombination * */ public static string[] toHanyuPinyinStringArray(char ch, HanyuPinyinOutputFormat outputFormat) { return getFormattedHanyuPinyinStringArray(ch, outputFormat); }
public void testToHanyuPinyinStringArray() { // any input of non-Chinese characters will return null { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); try { Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray('A', defaultFormat)); Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray('z', defaultFormat)); Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray(',', defaultFormat)); Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray('。', defaultFormat)); } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } // Chinese characters // single pronounciation { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "li3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('李', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat upperCaseFormat = new HanyuPinyinOutputFormat(); upperCaseFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); String[] expectedPinyinArray = new String[] { "LI3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('李', upperCaseFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "lu:3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('吕', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat vCharFormat = new HanyuPinyinOutputFormat(); vCharFormat.setVCharType(HanyuPinyinVCharType.WITH_V); String[] expectedPinyinArray = new String[] { "lv3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('吕', vCharFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } // multiple pronounciations { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "jian1", "jian4" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('间', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "hao3", "hao4" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('好', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } }
public void testOutputCombination() { try { HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat(); // fix case type to lowercase firstly, change VChar and Tone // combination outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); // WITH_U_AND_COLON and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("lu:3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("lv3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("lü3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // // WITH_U_AND_COLON and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("lu:", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("lv", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("lü", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_AND_COLON and WITH_TONE_MARK is forbidden // WITH_V and WITH_TONE_MARK is forbidden // WITH_U_UNICODE and WITH_TONE_MARK outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); Assert.AreEqual("lǚ", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // fix case type to UPPERCASE, change VChar and Tone // combination outputFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); // WITH_U_AND_COLON and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("LU:3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("LV3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("LÜ3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // // WITH_U_AND_COLON and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("LU:", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("LV", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("LÜ", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_AND_COLON and WITH_TONE_MARK is forbidden // WITH_V and WITH_TONE_MARK is forbidden // WITH_U_UNICODE and WITH_TONE_MARK outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); Assert.AreEqual("LǙ", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } }