/** * Get a string which all Chinese characters are replaced by corresponding * main (first) Hanyu Pinyin representation. * * <p> * <b>Special Note</b>: If the return contains "none0", that means that * Chinese character is in Unicode CJK talbe, however, it has not * pronounciation in Chinese. <b> This interface will be removed in next * release. </b> * * @param str * A given string contains Chinese characters * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @param seperater * The string is appended after a Chinese character (excluding * the last Chinese character at the end of sentence). <b>Note! * Seperater will not appear after a non-Chinese character</b> * @return a String identical to the original one but all recognizable * Chinese characters are converted into main (first) Hanyu Pinyin * representation * * @deprecated DO NOT use it again because the first retrived pinyin string * may be a wrong pronouciation in a certain sentence context. * <b> This interface will be removed in next release. </b> */ static public String toHanyuPinyinString(String str, HanyuPinyinOutputFormat outputFormat, String seperater) { StringBuilder resultPinyinStrBuf = new StringBuilder(); for (int i = 0; i < str.Length; i++) { String mainPinyinStrOfChar = getFirstHanyuPinyinString(str[i], outputFormat); if (null != mainPinyinStrOfChar) { resultPinyinStrBuf.Append(mainPinyinStrOfChar); if (i != str.Length - 1) { // avoid appending at the end resultPinyinStrBuf.Append(seperater); } } else { resultPinyinStrBuf.Append(str[i]); } } return(resultPinyinStrBuf.ToString()); }
static StringExtensions() { format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITHOUT_TONE; format.VCharType = HanyuPinyinVCharType.WITH_V; format.CaseType = HanyuPinyinCaseType.LOWERCASE; }
public void ProcessRequest(HttpContext context) { HttpRequest req = context.Request; HttpResponse res = context.Response; // 这里设置UTF8,避免乱码 res.Charset = "UTF-8"; res.ContentEncoding = System.Text.Encoding.UTF8; res.ContentType = "text/plain"; string pinyin = string.Empty; try { string hanzi = req["hanzi"]; // 用于控制多音字的返回, 有两种取值 first:取第1个音,all:取所有音 默认为取第1个音 string multi = req["multi"]; // 请求参数不为空才处理 if (!string.IsNullOrEmpty(hanzi)) { #region // 解析从客户端来的输出格式设置 HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.setCaseType(new HanyuPinyinCaseType(req["caseType"])); format.setToneType(new HanyuPinyinToneType(req["toneType"])); format.setVCharType(new HanyuPinyinVCharType(req["vType"])); #endregion foreach (char ch in hanzi) { if (Util.IsHanzi(ch)) { // 是汉字才处理 string[] py = PinyinHelper.toHanyuPinyinStringArray(ch, format); if (multi.Equals("first", StringComparison.OrdinalIgnoreCase) || py.Length == 1) { // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确 pinyin += py[0] + " "; } else { pinyin += "(" + string.Join(",", py) + ") "; } } else {// 不是汉字直接追加 pinyin += ch.ToString(); } } } } catch (Exception ex) { pinyin = ex.Message; } res.Write(pinyin); }
public string TestVCharType(char ch, HanyuPinyinVCharType vcharType) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.VCharType = vcharType; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]); }
public void TestToneMarkWithUAndColon(char ch, HanyuPinyinVCharType vcharType) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITH_TONE_MARK; format.VCharType = vcharType; Assert.Throws <InvalidHanyuPinyinFormatException>(() => PinyinHelper.ToHanyuPinyinStringArray(ch, format)); }
public string TestToneMark(char ch) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITH_TONE_MARK; format.VCharType = HanyuPinyinVCharType.WITH_U_UNICODE; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]); }
public void TestToneMarkWithUAndColon(char ch, HanyuPinyinVCharType vcharType) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITH_TONE_MARK; format.VCharType = vcharType; PinyinHelper.ToHanyuPinyinStringArray(ch, format); }
public void TestToneMarkWithUAndColon(char ch, HanyuPinyinVCharType vcharType) { var format = new HanyuPinyinOutputFormat { ToneType = HanyuPinyinToneType.WithToneMark, VCharType = vcharType }; Assert.Throws <InvalidHanyuPinyinFormatException> (() => PinyinHelper.ToHanyuPinyinStringArray(ch, format)); }
public string TestToneMark(char ch) { var format = new HanyuPinyinOutputFormat { ToneType = HanyuPinyinToneType.WithToneMark, VCharType = HanyuPinyinVCharType.WithUUnicode }; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format) [0]); }
public void TestVCharType(char ch, HanyuPinyinVCharType vcharType, string result) { var format = new HanyuPinyinOutputFormat { VCharType = vcharType }; Assert.Equal(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]); }
public string TestCaseType(char ch, HanyuPinyinVCharType vcharType) { var format = new HanyuPinyinOutputFormat { CaseType = HanyuPinyinCaseType.Uppercase, VCharType = vcharType }; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format) [0]); }
public void TestToneMark(char ch, string result) { var format = new HanyuPinyinOutputFormat { ToneType = HanyuPinyinToneType.WITH_TONE_MARK, VCharType = HanyuPinyinVCharType.WITH_U_UNICODE }; Assert.Equal(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]); }
public string TestWithToneNumber( char ch, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITH_TONE_NUMBER; format.VCharType = vcharType; format.CaseType = caseType; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]); }
public string TestWithToneNumber( char ch, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType) { var format = new HanyuPinyinOutputFormat { ToneType = HanyuPinyinToneType.WithToneNumber, VCharType = vcharType, CaseType = caseType }; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format) [0]); }
public string[] TestCharWithMultiplePronouciations( char ch, HanyuPinyinToneType toneType, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType) { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = toneType; format.VCharType = vcharType; format.CaseType = caseType; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)); }
public void TestWithToneNumber(char ch, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType, string result) { var format = new HanyuPinyinOutputFormat { ToneType = HanyuPinyinToneType.WITH_TONE_NUMBER, VCharType = vcharType, CaseType = caseType }; Assert.Equal(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]); }
public string[] TestCharWithMultiplePronunciations( char ch, HanyuPinyinToneType toneType, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType) { var format = new HanyuPinyinOutputFormat { ToneType = toneType, VCharType = vcharType, CaseType = caseType }; return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)); }
public void TestCharWithMultiplePronouciations( char ch, HanyuPinyinToneType toneType, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType, string[] result) { var format = new HanyuPinyinOutputFormat { ToneType = toneType, VCharType = vcharType, CaseType = caseType }; Assert.True(Enumerable.SequenceEqual(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format))); }
private static string[] GetFomattedHanyuPinyinStringArray( char ch, HanyuPinyinOutputFormat format) { string[] unformattedArr = GetUnformattedHanyuPinyinStringArray(ch); if (null != unformattedArr) { for (int i = 0; i < unformattedArr.Length; i++) { unformattedArr[i] = PinyinFormatter.FormatHanyuPinyin(unformattedArr[i], format); } } return(unformattedArr); }
/** * Get the first Hanyu Pinyin of a Chinese character <b> This function will * be removed in next release. </b> * * @param ch * The given Unicode character * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @return Return the first Hanyu Pinyin of given Chinese character; return * null if the input is not a Chinese character * * @deprecated DO NOT use it again because the first retrived pinyin string * may be a wrong pronouciation in a certain sentence context. * <b> This function will be removed in next release. </b> */ static private String getFirstHanyuPinyinString(char ch, HanyuPinyinOutputFormat outputFormat) { String[] pinyinStrArray = getFormattedHanyuPinyinStringArray(ch, outputFormat); if ((null != pinyinStrArray) && (pinyinStrArray.Length > 0)) { return(pinyinStrArray[0]); } else { return(null); } }
/** * Return the formatted Hanyu Pinyin representations of the given Chinese * character (both in Simplified and Tranditional) in array format. * * @param ch * the given Chinese character * @param outputFormat * Describes the desired format of returned Hanyu Pinyin string * @return The formatted Hanyu Pinyin representations of the given codepoint * in array format; null if no record is found in the hashtable. */ static private String[] getFormattedHanyuPinyinStringArray(char ch, HanyuPinyinOutputFormat outputFormat) { String[] pinyinStrArray = getUnformattedHanyuPinyinStringArray(ch); if (null != pinyinStrArray) { for (int i = 0; i < pinyinStrArray.Length; i++) { pinyinStrArray[i] = PinyinFormatter.formatHanyuPinyin(pinyinStrArray[i], outputFormat); } return(pinyinStrArray); } else { return(null); } }
/// <summary> /// Convert Hanyu pinyin to given format /// </summary> /// <param name="pinyin">The given Hanyu pinyin string </param> /// <param name="outputFormat">The given format</param> /// <returns>The Hanyu pinyin with given format</returns> internal static string FormatHanyuPinyin( string pinyin, HanyuPinyinOutputFormat outputFormat) { if (outputFormat == null) { throw new ArgumentNullException("The parameter outputFormat could not be null."); } if (outputFormat.ToneType == HanyuPinyinToneType.WITH_TONE_MARK && (outputFormat.VCharType == HanyuPinyinVCharType.WITH_U_AND_COLON || outputFormat.VCharType == HanyuPinyinVCharType.WITH_V)) { throw new InvalidHanyuPinyinFormatException("Tone marks cannot be added to v or u:"); } string result = pinyin.ToLower(); if (outputFormat.ToneType == HanyuPinyinToneType.WITHOUT_TONE) { result = Regex.Replace(pinyin, "[0-9]", ""); } else if (outputFormat.ToneType == HanyuPinyinToneType.WITH_TONE_MARK) { result = ConvertToneNumber2ToneMark(result.Replace("u:", "v")); } if (outputFormat.VCharType == HanyuPinyinVCharType.WITH_V) { result = result.Replace("u:", "v"); } else if (outputFormat.VCharType == HanyuPinyinVCharType.WITH_U_UNICODE) { result = result.Replace("u:", "ü"); } if (outputFormat.CaseType == HanyuPinyinCaseType.UPPERCASE) { result = result.ToUpper(); } return(result); }
/// <summary> /// 汉字转换拼音 /// </summary> /// <param name="self"></param> /// <returns></returns> public static string ToPinYin(this string self) { if (self.NullEmpty()) { return(string.Empty); } HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITHOUT_TONE; StringBuilder sb = new StringBuilder(); foreach (var item in self) { string[] pinyinStr = PinyinHelper.ToHanyuPinyinStringArray(item, format); sb.Append(pinyinStr.JoinBy(" ")); } //return NPinyin.Pinyin.GetPinyin(self, Encoding.UTF8); return(sb.ToString()); }
public static string GetPinYin(string chineseString) { if (string.IsNullOrWhiteSpace(chineseString)) { return(string.Empty); } Func <char, string> getPinYin = c => { HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.ToneType = HanyuPinyinToneType.WITHOUT_TONE; string[] pinYin = PinyinHelper.ToHanyuPinyinStringArray(c, format); if (pinYin == null) { return(c.ToString()); } var one = pinYin[0]; return(one); }; var result = chineseString.ToCharArray().Select(getPinYin).ToArray(); return(string.Join("", result)); }
public string[] Convert(char chineseCharacter, EToneType toneType) { var format = new HanyuPinyinOutputFormat { CaseType = HanyuPinyinCaseType.Lowercase, ToneType = HanyuPinyinToneType.WithToneMark, VCharType = HanyuPinyinVCharType.WithUUnicode }; switch (toneType) { case EToneType.Without: format.ToneType = HanyuPinyinToneType.WithoutTone; break; case EToneType.Number: format.ToneType = HanyuPinyinToneType.WithToneNumber; break; } return(PinyinHelper.ToHanyuPinyinStringArray(chineseCharacter, format)); }
internal static String formatHanyuPinyin(String pinyinStr, HanyuPinyinOutputFormat outputFormat) { if ((HanyuPinyinToneType.WITH_TONE_MARK.Equals(outputFormat.getToneType())) && ( (HanyuPinyinVCharType.WITH_V.Equals(outputFormat.getVCharType())) || (HanyuPinyinVCharType.WITH_U_AND_COLON.Equals(outputFormat.getVCharType())) )) { throw new BadHanyuPinyinOutputFormatCombination("tone marks cannot be added to v or u:"); } if (HanyuPinyinToneType.WITHOUT_TONE.Equals(outputFormat.getToneType())) { Regex reg = new Regex("[1-5]"); pinyinStr = reg.Replace(pinyinStr, ""); } else if (HanyuPinyinToneType.WITH_TONE_MARK.Equals(outputFormat.getToneType())) { pinyinStr = pinyinStr.Replace("u:", "v"); pinyinStr = convertToneNumber2ToneMark(pinyinStr); } if (HanyuPinyinVCharType.WITH_V.Equals(outputFormat.getVCharType())) { pinyinStr = pinyinStr.Replace("u:", "v"); } else if (HanyuPinyinVCharType.WITH_U_UNICODE.Equals(outputFormat.getVCharType())) { pinyinStr = pinyinStr.Replace("u:", "ü"); } if (HanyuPinyinCaseType.UPPERCASE.Equals(outputFormat.getCaseType())) { pinyinStr = pinyinStr.ToUpper(); } return(pinyinStr); }
/// <summary> /// 将汉字转换伟拼音 /// </summary> /// <param name="hanziStr">待转换汉字</param> /// <param name="hanyuPinyinOutputFormat">转换格式</param> /// <returns></returns> public static string ToHanyuPinyinString(string hanziStr, HanyuPinyinOutputFormat hanyuPinyinOutputFormat = null) { var pinyin = string.Empty; if (hanyuPinyinOutputFormat == null) { hanyuPinyinOutputFormat = new HanyuPinyinOutputFormat(); } foreach (var hanzi in hanziStr) { if (!Util.IsHanzi(hanzi)) { continue; } var pinyinStrArray = ChineseToPinyinConvert.GetInstance().GetHanyuPinyinStringArray(hanzi); if (null != pinyinStrArray) { if (pinyinStrArray.Length == 1) { pinyin += PinyinFormatter.FormatHanyuPinyin(pinyinStrArray[0], hanyuPinyinOutputFormat); } else { for (var i = 0; i < pinyinStrArray.Length; i++) { pinyinStrArray[i] = PinyinFormatter.FormatHanyuPinyin(pinyinStrArray[i], hanyuPinyinOutputFormat); } pinyin += "(" + string.Join(",", pinyinStrArray) + ") "; } } else { return(string.Empty); } } return(pinyin); }
/// <summary> /// formatted Hanyu Pinyin string /// </summary> /// <param name="pinyinStr">unformatted Hanyu Pinyin string</param> /// <param name="outputFormat">given format of Hanyu Pinyin</param> /// <returns>formatted Hanyu Pinyin string</returns> /// <exception cref="BadHanyuPinyinOutputFormatCombination"></exception> public static string FormatHanyuPinyin(string pinyinStr, HanyuPinyinOutputFormat outputFormat) { if ((HanyuPinyinToneType.WITH_TONE_MARK == outputFormat.ToneType) && ((HanyuPinyinVCharType.WITH_V == outputFormat.VCharType) || (HanyuPinyinVCharType.WITH_U_AND_COLON == outputFormat.VCharType))) { throw new BadHanyuPinyinOutputFormatCombination("tone marks cannot be added to v or u:"); } if (HanyuPinyinToneType.WITHOUT_TONE == outputFormat.ToneType) { //Using Regular Expressions pinyinStr = Regex.Replace(pinyinStr, "[1-5]", ""); } else if (HanyuPinyinToneType.WITH_TONE_MARK == outputFormat.ToneType) { pinyinStr = pinyinStr.Replace("u:", "v"); pinyinStr = ConvertToneNumber2ToneMark(pinyinStr); } if (HanyuPinyinVCharType.WITH_V == outputFormat.VCharType) { pinyinStr = pinyinStr.Replace("u:", "v"); } else if (HanyuPinyinVCharType.WITH_U_UNICODE == outputFormat.VCharType) { pinyinStr = pinyinStr.Replace("u:", "ü"); } if (HanyuPinyinCaseType.UPPERCASE == outputFormat.CaseType) { pinyinStr = pinyinStr.ToUpper(); } return(pinyinStr); }
/** * Get all Hanyu Pinyin presentations of a single Chinese character (both * Simplified and Tranditional) * * <p> * For example, <br/> If the input is '间', the return will be an array with * two Hanyu Pinyin strings: <br/> "jian1" <br/> "jian4" <br/> <br/> If the * input is '李', the return will be an array with single Hanyu Pinyin * string: <br/> "li3" * * <p> * <b>Special Note</b>: If the return is "none0", that means the input * Chinese character is in Unicode CJK talbe, however, it has no * pronounciation in Chinese * * @param ch * the given Chinese character * @param outputFormat * describes the desired format of returned Hanyu Pinyin String * * @return a String array contains all Hanyu Pinyin presentations with tone * numbers; return null for non-Chinese character * * @throws BadHanyuPinyinOutputFormatCombination * if certain combination of output formats happens * * @see HanyuPinyinOutputFormat * @see BadHanyuPinyinOutputFormatCombination * */ static public String[] toHanyuPinyinStringArray(char ch, HanyuPinyinOutputFormat outputFormat) { return(getFormattedHanyuPinyinStringArray(ch, outputFormat)); }
/// <summary> /// Get all Hanyu pinyin of a single Chinese character (both /// Simplified Chinese and Traditional Chinese). /// </summary> /// <param name="ch">The given Chinese character</param> /// <param name="format">The given output format</param> /// <returns>A string array contains all Hanyu pinyin presentations; return /// null for non-Chinese character.</returns> public static string[] ToHanyuPinyinStringArray( char ch, HanyuPinyinOutputFormat format) { return(GetFomattedHanyuPinyinStringArray(ch, format)); }