private string Hanzi2Pinyin(RequestModel request) { var result = new StringBuilder(); // 解析从客户端来的输出格式设置 PinyinFormat format = PinyinUtil.ParseFormat(request.ToneType) | PinyinUtil.ParseFormat(request.CaseType) | PinyinUtil.ParseFormat(request.VType); foreach (char ch in request.Key) { if (!PinyinUtil.IsHanzi(ch)) {// 不是汉字直接追加 result.Append(ch); continue; } // 是汉字才处理 // 是否只取第一个拼音 if (request.Multi.Equals("first", StringComparison.OrdinalIgnoreCase)) { // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确 result.AppendFormat("{0} ", Pinyin4Net.GetFirstPinyin(ch, format)); continue; } string[] py = Pinyin4Net.GetPinyin(ch, format); result.AppendFormat("({0}) ", string.Join(",", py)); } return(result.ToString()); }
/// <summary> /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式) /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。</param> /// <param name="pinyinHandler"> /// 拼音处理器,在获取到拼音后通过这个来处理, /// 如果传null,则默认取第一个拼音(多音字), /// 参数: /// 1 string[] 拼音数组 /// 2 char 当前的汉字 /// 3 string 要转成拼音的字符串 /// return 拼音字符串,这个返回值将作为这个汉字的拼音放到结果中 /// </param> public static string GetPinyin(string text, PinyinFormat format, bool caseSpread, Func <string[], char, string, string> pinyinHandler) { if (string.IsNullOrEmpty(text)) { return(""); } var pinyin = new StringBuilder(); foreach (var item in text) { if (!PinyinUtil.IsHanzi(item)) { pinyin.Append(item); continue; } var pinyinTemp = PinyinDB.Instance.GetPinyin(item); pinyin.Append(pinyinHandler == null ? pinyinTemp[0] : pinyinHandler.Invoke(pinyinTemp, item, text)); } return(SpreadCase(format, caseSpread, false, pinyin)); }
public string GetPinyin(string s, PinyinFormat format) { var pinyin = p.GetPinyin(s); return(PininFormat.Raw.Format(pinyin)); return(String.Empty); }
/// <summary> /// 将拼音格式化成指定的格式 /// </summary> /// <param name="py">待格式化的拼音</param> /// <param name="format">格式</param> /// <see cref="ToneFormat"/> /// <see cref="CaseFormat"/> /// <see cref="VCharFormat"/> /// <returns></returns> public static string Format(string py, PinyinFormat format) { // "v"或"u:"不能添加声调 if (format.Contains(WITH_TONE_MARK) && (format.Contains(WITH_V) || format.Contains(WITH_U_AND_COLON))) { throw new PinyinException("\"v\"或\"u:\"不能添加声调"); } var pinyin = py; if (format.Contains(WITHOUT_TONE)) { // 不带声调 var reg = new Regex("[1-5]"); pinyin = reg.Replace(pinyin, ""); } else if (format.Contains(WITH_TONE_MARK)) { // 带声调标志 pinyin = pinyin.Replace("u:", "v"); pinyin = convertToneNumber2ToneMark(pinyin); } if (format.Contains(WITH_V)) { // 输出v pinyin = pinyin.Replace("u:", "v"); } else if (format.Contains(WITH_U_UNICODE)) { // 输出ü pinyin = pinyin.Replace("u:", "ü"); } if (format.Contains(UPPERCASE)) { // 大写 pinyin = pinyin.ToUpper(); } else if (format.Contains(LOWERCASE)) { // 小写 pinyin = pinyin.ToLower(); } else if (format.Contains(CAPITALIZE_FIRST_LETTER)) { // 首字母大写 // 不处理单拼音 a e o if (!IGNORE_LIST.Contains(pinyin.ToLower())) { pinyin = pinyin.Substring(0, 1).ToUpper() + (pinyin.Length == 1 ? "" : pinyin.Substring(1)); } } return(pinyin); }
/// <summary> /// 获取格式化后的唯一拼音(单音字)或者第一个拼音(多音字) /// </summary> /// <param name="hanzi">要查询拼音的汉字字符</param> /// <param name="format">拼音输出格式化参数</param> /// <see cref="PinyinFormat"/> /// <seealso cref="PinyinUtil"/> /// <returns>格式化后的唯一拼音(单音字)或者第一个拼音(多音字)</returns> /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception> public static string GetFirstPinyin(char hanzi, PinyinFormat format = PinyinFormat.None) { var pinyin = GetPinyin(hanzi)[0]; if (format == PinyinFormat.None) { return(pinyin); } return(PinyinUtil.Format(pinyin, format)); }
string IPinyin.GetPinyin(string s, PinyinFormat format) { if (GetPinyin(s, out var str)) { s = str !; s = format switch { PinyinFormat.UpperVerticalBar => s.Replace(' ', Pinyin.SeparatorVerticalBar).ToUpper(), PinyinFormat.AlphabetSort => s, _ => throw new ArgumentOutOfRangeException(nameof(format), format, null), }; } return(s); }
/// <summary> /// 获取汉字的拼音数组 /// </summary> /// <param name="hanzi">要查询拼音的汉字字符</param> /// <param name="format">设置输出拼音的格式</param> /// <returns>汉字的拼音数组,若未找到汉字拼音,则返回空数组</returns> /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception> public static string[] GetPinyin(char hanzi, PinyinFormat format = PinyinFormat.None) { if (!PinyinUtil.IsHanzi(hanzi)) { // 不是汉字 throw new UnsupportedUnicodeException("不支持的字符: 请输入汉字"); } var pinyin = PinyinDB.Instance.GetPinyin(hanzi); if (format == PinyinFormat.None) { return(pinyin); } return(pinyin.Select(item => PinyinUtil.Format(item, format)).ToArray()); }
/// <summary> /// 获取姓的拼音,如果是复姓则由空格分隔 /// </summary> /// <param name="firstName">要查询拼音的姓</param> /// <param name="format">输出拼音格式化参数</param> /// <returns>返回姓的拼音,若未找到姓,则返回null</returns> /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception> public static string GetPinyin(string firstName, PinyinFormat format = PinyinFormat.None) { if (!firstName.All(PinyinUtil.IsHanzi)) { // 不是汉字 throw new UnsupportedUnicodeException("不支持的字符: 请输入汉字字符"); } var pinyin = NameDB.Instance.GetPinyin(firstName); if (format == PinyinFormat.None) { return(pinyin); } return(string.Join(" ", pinyin.Split(' ').Select(item => PinyinUtil.Format(item, format)))); }
/// <summary> /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式) /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。firstLetterOnly为false时有效 </param> /// <param name="firstLetterOnly">是否只取拼音首字母,为true时,format无效</param> /// <param name="multiFirstLetter">firstLetterOnly为true时有效,多音字的多个读音首字母是否全取,如果多音字拼音首字母相同,只保留一个</param> /// <returns>firstLetterOnly为true时,只取拼音首字母格式为[L],后面追加空格;multiFirstLetter为true时,多音字的多个拼音首字母格式为[L, H],后面追加空格</returns> public static string GetPinyin(string text, PinyinFormat format, bool caseSpread, bool firstLetterOnly, bool multiFirstLetter) { if (string.IsNullOrEmpty(text)) { return(""); } var pinyin = new StringBuilder(); var firstLetterBuf = new List <string>(); foreach (var item in text) { if (!PinyinUtil.IsHanzi(item)) { pinyin.Append(item); continue; } if (!firstLetterOnly) { pinyin.Append(GetFirstPinyin(item, format) + " "); continue; } if (!multiFirstLetter) { pinyin.AppendFormat("[{0}] ", GetFirstPinyin(item)[0]); continue; } firstLetterBuf.Clear(); firstLetterBuf.AddRange(GetPinyin(item) .Select(py => py[0].ToString()) // 这句是处理多音字,多音字的拼音可能首字母是一样的, // 如果是一样的,肯定就只返回一次 .Distinct()); pinyin.AppendFormat("[{0}] ", string.Join(",", firstLetterBuf.ToArray())); } return(SpreadCase(format, caseSpread, firstLetterOnly, pinyin)); }
public static string[] GetSingle(char ch, PinyinFormat format = PinyinFormat.Default, ChineseType chineseType = ChineseType.Simplified) { var lexicon = ChineseLexicon.Current ?? ChineseLexicon.Default; var word = ch.ToString(); var chineseWord = chineseType == ChineseType.Traditional ? lexicon.Words.First(x => x.Traditional == word) : lexicon.Words.First(x => x.Simplified == word); var pinyins = chineseWord.Pinyins.Select(pinyin => { return(format switch { PinyinFormat.Default => pinyin, PinyinFormat.WithoutTone => GetPinyinWithoutTone(pinyin), PinyinFormat.Phonetic => GetPhoneticSymbol(pinyin), PinyinFormat.Code => pinyin.First().ToString(), _ => throw new NotImplementedException(), }); }).ToArray();
/// <summary> /// 获取一个字符串内所有汉字的拼音数组 /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <returns>返回拼音列表,每个汉字的拼音会作为一个数组存放(无论是单音字还是多音字)</returns> /// <see cref="PinyinItem"/> public static List <PinyinItem> GetPinyinArray(string text, PinyinFormat format) { var pinyin = new List <PinyinItem>(); if (string.IsNullOrEmpty(text)) { return(pinyin); } ; foreach (var character in text) { var item = new PinyinItem(character); if (item.IsHanzi) { item.AddRange(GetPinyin(character, format)); } pinyin.Add(item); } return(pinyin); }
/// <summary> /// 扩展大小写格式 /// </summary> /// <param name="format"></param> /// <param name="caseSpread"></param> /// <param name="firstLetterOnly"></param> /// <param name="pinyin"></param> /// <returns></returns> public static string SpreadCase(PinyinFormat format, bool caseSpread, bool firstLetterOnly, StringBuilder pinyin) { if (firstLetterOnly || !caseSpread) { return(pinyin.ToString().Trim()); } if (format.Contains(CAPITALIZE_FIRST_LETTER)) { return(CapitalizeFirstLetter(pinyin));; } if (format.Contains(LOWERCASE)) { return(pinyin.ToString().Trim().ToLower()); } if (format.Contains(UPPERCASE)) { return(pinyin.ToString().Trim().ToUpper()); } return(pinyin.ToString().Trim()); }
public static string GetString(string chinese, PinyinFormat format = PinyinFormat.Default) { if (!chinese.IsNullOrWhiteSpace()) { var sb = new StringBuilder(); var insertSpace = false; foreach (var ch in chinese) { try { var chineseChar = new ChineseChar(ch); var pinyin = chineseChar.Pinyins[0].ToString().ToLower(); if (insertSpace) { sb.Append(" "); } switch (format) { case PinyinFormat.Default: sb.Append(pinyin); break; case PinyinFormat.WithoutTone: sb.Append(pinyin.Slice(0, -1)); break; case PinyinFormat.PhoneticSymbol: sb.Append(GetPhoneticSymbol(pinyin)); break; } insertSpace = true; } catch { sb.Append(ch); insertSpace = false; } } return(sb.ToString()); } return(chinese); }
/// <summary> /// 获取中文拼音 /// </summary> /// <param name="Chinese"></param> /// <param name="format"></param> /// <param name="chineseType"></param> /// <returns></returns> public static string CHNPinYin(string Chinese, PinyinFormat format, ChineseTypes chineseType = ChineseTypes.Simplified) { return(Pinyin.GetString(chineseType, Chinese, format)); }
/// <summary> /// 判断拼音格式 /// </summary> /// <param name="hanzi"></param> /// <param name="expected"></param> /// <param name="format"></param> private void PinyinFormatAssert(string hanzi, string expected, PinyinFormat format) { var fmted = Pinyin4Name.GetPinyin(hanzi, format); Assert.AreEqual(expected, fmted); }
/// <summary> /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式),format中指定的大小写模式不会扩展到非拼音字符 /// </summary> /// <param name="text">要获取拼音的汉字字符串</param> /// <param name="format">拼音输出格式化参数</param> /// <returns>格式化后的拼音字符串</returns> public static string GetPinyin(string text, PinyinFormat format) { return(GetPinyin(text, format, false, false, false)); }
/// <summary> /// 获取指定类型字符串的拼音 /// </summary> /// <param name="chineseTypes"></param> /// <param name="chinese"></param> /// <param name="format"></param> /// <returns></returns> public static string GetString(ChineseTypes chineseTypes, string chinese, PinyinFormat format = PinyinFormat.Default) { var scope = LexiconScope.Current ?? LexiconScope.Default; var lexicon = scope.Lexicon; IEnumerable <int> GetDefaultSteps() { foreach (var ch in chinese) { yield return(1); } } var steps = lexicon is null?GetDefaultSteps() : ChineseTokenizer.SplitWords(chineseTypes, chinese).Select(x => x.Length); if (!chinese.IsNullOrWhiteSpace()) { var sb = new StringBuilder(); var insertSpace = false; var ptext = 0; foreach (var step in steps) { var word = chinese.Substring(ptext, step); try { string pinyin = null; if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified)) { pinyin = lexicon.Words.FirstOrDefault(x => x.Simplified == word)?.SimplifiedPinyin; } if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional)) { pinyin = lexicon.Words.FirstOrDefault(x => x.Traditional == word)?.TraditionalPinyin; } if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified)) { pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.SimplifiedPinyin; } if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional)) { pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.TraditionalPinyin; } if (pinyin is null) { throw new ArgumentException($"未能匹配文字({word})。"); } if (format != PinyinFormat.InitialConsonant) { if (insertSpace) { sb.Append(" "); } } switch (format) { case PinyinFormat.Default: sb.Append(pinyin); break; case PinyinFormat.WithoutTone: sb.Append(GetPinyinWithoutTone(pinyin)); break; case PinyinFormat.Phonetic: sb.Append(GetPhoneticSymbol(pinyin)); break; case PinyinFormat.InitialConsonant: sb.Append(pinyin.First()); break; } insertSpace = true; } catch { sb.Append(word); insertSpace = false; } ptext += step; } return(sb.ToString()); } return(chinese); }
/// <summary> /// 获取拼音(简体中文) /// </summary> /// <param name="chinese"></param> /// <param name="format"></param> /// <returns></returns> public static string GetString(string chinese, PinyinFormat format = PinyinFormat.Default) => GetString(ChineseTypes.Simplified, chinese, format);
public static string GetString(string chinese, PinyinFormat format = PinyinFormat.Default, ChineseType chineseType = ChineseType.Simplified) { var lexicon = ChineseLexicon.Current; IEnumerable <int> GetDefaultSteps() { foreach (var ch in chinese) { yield return(1); } } var steps = lexicon is null?GetDefaultSteps() : ChineseTokenizer.SplitWords(chinese, chineseType).Select(x => x.Length); if (!chinese.IsNullOrWhiteSpace()) { var sb = new StringBuilder(); var insertSpace = false; var ptext = 0; foreach (var step in steps) { var word = chinese.Substring(ptext, step); try { string pinyin; if (word.Length == 1) { var chineseChar = new ChineseChar(word[0]); pinyin = chineseChar.Pinyins[0].ToString().ToLower(); } else { var chineseWord = chineseType == ChineseType.Traditional ? lexicon.Words.First(x => x.Traditional == word) : lexicon.Words.First(x => x.Simplified == word); pinyin = chineseWord.Pinyin; } if (format != PinyinFormat.Code) { if (insertSpace) { sb.Append(" "); } } switch (format) { case PinyinFormat.Default: sb.Append(pinyin); break; case PinyinFormat.WithoutTone: sb.Append(GetPinyinWithoutTone(pinyin)); break; case PinyinFormat.Phonetic: sb.Append(GetPhoneticSymbol(pinyin)); break; case PinyinFormat.Code: sb.Append(pinyin.First()); break; } insertSpace = true; } catch { sb.Append(word); insertSpace = false; } ptext += step; } return(sb.ToString()); } return(chinese); }
/// <summary> /// 判断拼音格式 /// </summary> /// <param name="hanzi"></param> /// <param name="expected"></param> /// <param name="format"></param> private void PinyinFormatAssert(char hanzi, string expected, PinyinFormat format) { var fmted = Pinyin4Net.GetFirstPinyin(hanzi, format); Assert.AreEqual(expected, fmted); }
/// <summary> /// 扩展 OutputFormat,判断是否包含指定的格式化标识 /// </summary> /// <param name="expected"></param> /// <param name="value"></param> /// <returns></returns> public static bool Contains(this PinyinFormat value, PinyinFormat expected) { return((expected & value) == expected); }
string IPinyin.GetPinyin(string s, PinyinFormat format) => format switch {
/// <inheritdoc cref="IPinyin.GetPinyin(string, PinyinFormat)"/> public static string GetPinyin(string s, PinyinFormat format) => IPinyin.Instance.GetPinyin(s, format);