Exemplo n.º 1
0
        private string Hanzi2Pinyin(RequestModel request)
        {
            var result = new StringBuilder();

            // 解析从客户端来的输出格式设置
            PinyinFormat format = PinyinUtil.ParseFormat(request.ToneType) |
                                  PinyinUtil.ParseFormat(request.CaseType) |
                                  PinyinUtil.ParseFormat(request.VType);

            foreach (char ch in request.Key)
            {
                if (!PinyinUtil.IsHanzi(ch))
                {// 不是汉字直接追加
                    result.Append(ch);
                    continue;
                }

                // 是汉字才处理

                // 是否只取第一个拼音
                if (request.Multi.Equals("first", StringComparison.OrdinalIgnoreCase))
                {
                    // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确
                    result.AppendFormat("{0} ", Pinyin4Net.GetFirstPinyin(ch, format));
                    continue;
                }

                string[] py = Pinyin4Net.GetPinyin(ch, format);
                result.AppendFormat("({0}) ", string.Join(",", py));
            }

            return(result.ToString());
        }
Exemplo n.º 2
0
        /// <summary>
        /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式)
        /// </summary>
        /// <param name="text">要获取拼音的汉字字符串</param>
        /// <param name="format">拼音输出格式化参数</param>
        /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。</param>
        /// <param name="pinyinHandler">
        /// 拼音处理器,在获取到拼音后通过这个来处理,
        /// 如果传null,则默认取第一个拼音(多音字),
        /// 参数:
        /// 1 string[] 拼音数组
        /// 2 char 当前的汉字
        /// 3 string 要转成拼音的字符串
        /// return 拼音字符串,这个返回值将作为这个汉字的拼音放到结果中
        /// </param>
        public static string GetPinyin(string text, PinyinFormat format, bool caseSpread, Func <string[], char, string, string> pinyinHandler)
        {
            if (string.IsNullOrEmpty(text))
            {
                return("");
            }

            var pinyin = new StringBuilder();

            foreach (var item in text)
            {
                if (!PinyinUtil.IsHanzi(item))
                {
                    pinyin.Append(item);
                    continue;
                }

                var pinyinTemp = PinyinDB.Instance.GetPinyin(item);

                pinyin.Append(pinyinHandler == null ?
                              pinyinTemp[0] :
                              pinyinHandler.Invoke(pinyinTemp, item, text));
            }

            return(SpreadCase(format, caseSpread, false, pinyin));
        }
Exemplo n.º 3
0
        public string GetPinyin(string s, PinyinFormat format)
        {
            var pinyin = p.GetPinyin(s);

            return(PininFormat.Raw.Format(pinyin));

            return(String.Empty);
        }
Exemplo n.º 4
0
        /// <summary>
        /// 将拼音格式化成指定的格式
        /// </summary>
        /// <param name="py">待格式化的拼音</param>
        /// <param name="format">格式</param>
        /// <see cref="ToneFormat"/>
        /// <see cref="CaseFormat"/>
        /// <see cref="VCharFormat"/>
        /// <returns></returns>
        public static string Format(string py, PinyinFormat format)
        {
            // "v"或"u:"不能添加声调
            if (format.Contains(WITH_TONE_MARK) && (format.Contains(WITH_V) || format.Contains(WITH_U_AND_COLON)))
            {
                throw new PinyinException("\"v\"或\"u:\"不能添加声调");
            }
            var pinyin = py;

            if (format.Contains(WITHOUT_TONE))
            {
                // 不带声调
                var reg = new Regex("[1-5]");
                pinyin = reg.Replace(pinyin, "");
            }
            else if (format.Contains(WITH_TONE_MARK))
            {
                // 带声调标志
                pinyin = pinyin.Replace("u:", "v");
                pinyin = convertToneNumber2ToneMark(pinyin);
            }

            if (format.Contains(WITH_V))
            {
                // 输出v
                pinyin = pinyin.Replace("u:", "v");
            }
            else if (format.Contains(WITH_U_UNICODE))
            {
                // 输出ü
                pinyin = pinyin.Replace("u:", "ü");
            }

            if (format.Contains(UPPERCASE))
            {
                // 大写
                pinyin = pinyin.ToUpper();
            }
            else if (format.Contains(LOWERCASE))
            {
                // 小写
                pinyin = pinyin.ToLower();
            }
            else if (format.Contains(CAPITALIZE_FIRST_LETTER))
            {
                // 首字母大写

                // 不处理单拼音 a e o
                if (!IGNORE_LIST.Contains(pinyin.ToLower()))
                {
                    pinyin = pinyin.Substring(0, 1).ToUpper() + (pinyin.Length == 1 ? "" : pinyin.Substring(1));
                }
            }

            return(pinyin);
        }
Exemplo n.º 5
0
        /// <summary>
        /// 获取格式化后的唯一拼音(单音字)或者第一个拼音(多音字)
        /// </summary>
        /// <param name="hanzi">要查询拼音的汉字字符</param>
        /// <param name="format">拼音输出格式化参数</param>
        /// <see cref="PinyinFormat"/>
        /// <seealso cref="PinyinUtil"/>
        /// <returns>格式化后的唯一拼音(单音字)或者第一个拼音(多音字)</returns>
        /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception>
        public static string GetFirstPinyin(char hanzi, PinyinFormat format = PinyinFormat.None)
        {
            var pinyin = GetPinyin(hanzi)[0];

            if (format == PinyinFormat.None)
            {
                return(pinyin);
            }
            return(PinyinUtil.Format(pinyin, format));
        }
Exemplo n.º 6
0
 string IPinyin.GetPinyin(string s, PinyinFormat format)
 {
     if (GetPinyin(s, out var str))
     {
         s = str !;
         s = format switch
         {
             PinyinFormat.UpperVerticalBar => s.Replace(' ', Pinyin.SeparatorVerticalBar).ToUpper(),
             PinyinFormat.AlphabetSort => s,
             _ => throw new ArgumentOutOfRangeException(nameof(format), format, null),
         };
     }
     return(s);
 }
Exemplo n.º 7
0
        /// <summary>
        /// 获取汉字的拼音数组
        /// </summary>
        /// <param name="hanzi">要查询拼音的汉字字符</param>
        /// <param name="format">设置输出拼音的格式</param>
        /// <returns>汉字的拼音数组,若未找到汉字拼音,则返回空数组</returns>
        /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception>
        public static string[] GetPinyin(char hanzi, PinyinFormat format = PinyinFormat.None)
        {
            if (!PinyinUtil.IsHanzi(hanzi))
            {
                // 不是汉字
                throw new UnsupportedUnicodeException("不支持的字符: 请输入汉字");
            }
            var pinyin = PinyinDB.Instance.GetPinyin(hanzi);

            if (format == PinyinFormat.None)
            {
                return(pinyin);
            }
            return(pinyin.Select(item => PinyinUtil.Format(item, format)).ToArray());
        }
Exemplo n.º 8
0
        /// <summary>
        /// 获取姓的拼音,如果是复姓则由空格分隔
        /// </summary>
        /// <param name="firstName">要查询拼音的姓</param>
        /// <param name="format">输出拼音格式化参数</param>
        /// <returns>返回姓的拼音,若未找到姓,则返回null</returns>
        /// <exception cref="UnsupportedUnicodeException">当要获取拼音的字符不是汉字时抛出此异常</exception>
        public static string GetPinyin(string firstName, PinyinFormat format = PinyinFormat.None)
        {
            if (!firstName.All(PinyinUtil.IsHanzi))
            {
                // 不是汉字
                throw new UnsupportedUnicodeException("不支持的字符: 请输入汉字字符");
            }
            var pinyin = NameDB.Instance.GetPinyin(firstName);

            if (format == PinyinFormat.None)
            {
                return(pinyin);
            }

            return(string.Join(" ", pinyin.Split(' ').Select(item => PinyinUtil.Format(item, format))));
        }
Exemplo n.º 9
0
        /// <summary>
        /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式)
        /// </summary>
        /// <param name="text">要获取拼音的汉字字符串</param>
        /// <param name="format">拼音输出格式化参数</param>
        /// <param name="caseSpread">是否将前面的格式中的大小写扩展到其它非拼音字符,默认为false。firstLetterOnly为false时有效 </param>
        /// <param name="firstLetterOnly">是否只取拼音首字母,为true时,format无效</param>
        /// <param name="multiFirstLetter">firstLetterOnly为true时有效,多音字的多个读音首字母是否全取,如果多音字拼音首字母相同,只保留一个</param>
        /// <returns>firstLetterOnly为true时,只取拼音首字母格式为[L],后面追加空格;multiFirstLetter为true时,多音字的多个拼音首字母格式为[L, H],后面追加空格</returns>
        public static string GetPinyin(string text, PinyinFormat format, bool caseSpread, bool firstLetterOnly, bool multiFirstLetter)
        {
            if (string.IsNullOrEmpty(text))
            {
                return("");
            }

            var pinyin         = new StringBuilder();
            var firstLetterBuf = new List <string>();

            foreach (var item in text)
            {
                if (!PinyinUtil.IsHanzi(item))
                {
                    pinyin.Append(item);
                    continue;
                }

                if (!firstLetterOnly)
                {
                    pinyin.Append(GetFirstPinyin(item, format) + " ");
                    continue;
                }
                if (!multiFirstLetter)
                {
                    pinyin.AppendFormat("[{0}] ", GetFirstPinyin(item)[0]);
                    continue;
                }

                firstLetterBuf.Clear();

                firstLetterBuf.AddRange(GetPinyin(item)
                                        .Select(py => py[0].ToString())
                                        // 这句是处理多音字,多音字的拼音可能首字母是一样的,
                                        // 如果是一样的,肯定就只返回一次
                                        .Distinct());

                pinyin.AppendFormat("[{0}] ", string.Join(",", firstLetterBuf.ToArray()));
            }

            return(SpreadCase(format, caseSpread, firstLetterOnly, pinyin));
        }
Exemplo n.º 10
0
        public static string[] GetSingle(char ch, PinyinFormat format = PinyinFormat.Default, ChineseType chineseType = ChineseType.Simplified)
        {
            var lexicon = ChineseLexicon.Current ?? ChineseLexicon.Default;
            var word    = ch.ToString();

            var chineseWord = chineseType == ChineseType.Traditional
                ? lexicon.Words.First(x => x.Traditional == word)
                : lexicon.Words.First(x => x.Simplified == word);

            var pinyins = chineseWord.Pinyins.Select(pinyin =>
            {
                return(format switch
                {
                    PinyinFormat.Default => pinyin,
                    PinyinFormat.WithoutTone => GetPinyinWithoutTone(pinyin),
                    PinyinFormat.Phonetic => GetPhoneticSymbol(pinyin),
                    PinyinFormat.Code => pinyin.First().ToString(),
                    _ => throw new NotImplementedException(),
                });
            }).ToArray();
Exemplo n.º 11
0
        /// <summary>
        /// 获取一个字符串内所有汉字的拼音数组
        /// </summary>
        /// <param name="text">要获取拼音的汉字字符串</param>
        /// <param name="format">拼音输出格式化参数</param>
        /// <returns>返回拼音列表,每个汉字的拼音会作为一个数组存放(无论是单音字还是多音字)</returns>
        /// <see cref="PinyinItem"/>
        public static List <PinyinItem> GetPinyinArray(string text, PinyinFormat format)
        {
            var pinyin = new List <PinyinItem>();

            if (string.IsNullOrEmpty(text))
            {
                return(pinyin);
            }
            ;

            foreach (var character in text)
            {
                var item = new PinyinItem(character);
                if (item.IsHanzi)
                {
                    item.AddRange(GetPinyin(character, format));
                }
                pinyin.Add(item);
            }

            return(pinyin);
        }
Exemplo n.º 12
0
        /// <summary>
        /// 扩展大小写格式
        /// </summary>
        /// <param name="format"></param>
        /// <param name="caseSpread"></param>
        /// <param name="firstLetterOnly"></param>
        /// <param name="pinyin"></param>
        /// <returns></returns>
        public static string SpreadCase(PinyinFormat format, bool caseSpread, bool firstLetterOnly, StringBuilder pinyin)
        {
            if (firstLetterOnly || !caseSpread)
            {
                return(pinyin.ToString().Trim());
            }

            if (format.Contains(CAPITALIZE_FIRST_LETTER))
            {
                return(CapitalizeFirstLetter(pinyin));;
            }
            if (format.Contains(LOWERCASE))
            {
                return(pinyin.ToString().Trim().ToLower());
            }
            if (format.Contains(UPPERCASE))
            {
                return(pinyin.ToString().Trim().ToUpper());
            }

            return(pinyin.ToString().Trim());
        }
Exemplo n.º 13
0
        public static string GetString(string chinese, PinyinFormat format = PinyinFormat.Default)
        {
            if (!chinese.IsNullOrWhiteSpace())
            {
                var sb          = new StringBuilder();
                var insertSpace = false;
                foreach (var ch in chinese)
                {
                    try
                    {
                        var chineseChar = new ChineseChar(ch);
                        var pinyin      = chineseChar.Pinyins[0].ToString().ToLower();

                        if (insertSpace)
                        {
                            sb.Append(" ");
                        }

                        switch (format)
                        {
                        case PinyinFormat.Default: sb.Append(pinyin); break;

                        case PinyinFormat.WithoutTone: sb.Append(pinyin.Slice(0, -1)); break;

                        case PinyinFormat.PhoneticSymbol: sb.Append(GetPhoneticSymbol(pinyin)); break;
                        }
                        insertSpace = true;
                    }
                    catch
                    {
                        sb.Append(ch);
                        insertSpace = false;
                    }
                }
                return(sb.ToString());
            }
            return(chinese);
        }
Exemplo n.º 14
0
 /// <summary>
 /// 获取中文拼音
 /// </summary>
 /// <param name="Chinese"></param>
 /// <param name="format"></param>
 /// <param name="chineseType"></param>
 /// <returns></returns>
 public static string CHNPinYin(string Chinese, PinyinFormat format, ChineseTypes chineseType = ChineseTypes.Simplified)
 {
     return(Pinyin.GetString(chineseType, Chinese, format));
 }
Exemplo n.º 15
0
        /// <summary>
        /// 判断拼音格式
        /// </summary>
        /// <param name="hanzi"></param>
        /// <param name="expected"></param>
        /// <param name="format"></param>
        private void PinyinFormatAssert(string hanzi, string expected, PinyinFormat format)
        {
            var fmted = Pinyin4Name.GetPinyin(hanzi, format);

            Assert.AreEqual(expected, fmted);
        }
Exemplo n.º 16
0
 /// <summary>
 /// 获取一个字符串内所有汉字的拼音(多音字取第一个读音,带格式),format中指定的大小写模式不会扩展到非拼音字符
 /// </summary>
 /// <param name="text">要获取拼音的汉字字符串</param>
 /// <param name="format">拼音输出格式化参数</param>
 /// <returns>格式化后的拼音字符串</returns>
 public static string GetPinyin(string text, PinyinFormat format)
 {
     return(GetPinyin(text, format, false, false, false));
 }
Exemplo n.º 17
0
        /// <summary>
        /// 获取指定类型字符串的拼音
        /// </summary>
        /// <param name="chineseTypes"></param>
        /// <param name="chinese"></param>
        /// <param name="format"></param>
        /// <returns></returns>
        public static string GetString(ChineseTypes chineseTypes, string chinese, PinyinFormat format = PinyinFormat.Default)
        {
            var scope   = LexiconScope.Current ?? LexiconScope.Default;
            var lexicon = scope.Lexicon;

            IEnumerable <int> GetDefaultSteps()
            {
                foreach (var ch in chinese)
                {
                    yield return(1);
                }
            }

            var steps = lexicon is null?GetDefaultSteps() : ChineseTokenizer.SplitWords(chineseTypes, chinese).Select(x => x.Length);

            if (!chinese.IsNullOrWhiteSpace())
            {
                var sb          = new StringBuilder();
                var insertSpace = false;
                var ptext       = 0;
                foreach (var step in steps)
                {
                    var word = chinese.Substring(ptext, step);
                    try
                    {
                        string pinyin = null;

                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified))
                        {
                            pinyin = lexicon.Words.FirstOrDefault(x => x.Simplified == word)?.SimplifiedPinyin;
                        }
                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional))
                        {
                            pinyin = lexicon.Words.FirstOrDefault(x => x.Traditional == word)?.TraditionalPinyin;
                        }
                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified))
                        {
                            pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.SimplifiedPinyin;
                        }
                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional))
                        {
                            pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.TraditionalPinyin;
                        }

                        if (pinyin is null)
                        {
                            throw new ArgumentException($"未能匹配文字({word})。");
                        }

                        if (format != PinyinFormat.InitialConsonant)
                        {
                            if (insertSpace)
                            {
                                sb.Append(" ");
                            }
                        }

                        switch (format)
                        {
                        case PinyinFormat.Default: sb.Append(pinyin); break;

                        case PinyinFormat.WithoutTone: sb.Append(GetPinyinWithoutTone(pinyin)); break;

                        case PinyinFormat.Phonetic: sb.Append(GetPhoneticSymbol(pinyin)); break;

                        case PinyinFormat.InitialConsonant: sb.Append(pinyin.First()); break;
                        }
                        insertSpace = true;
                    }
                    catch
                    {
                        sb.Append(word);
                        insertSpace = false;
                    }

                    ptext += step;
                }

                return(sb.ToString());
            }
            return(chinese);
        }
Exemplo n.º 18
0
 /// <summary>
 /// 获取拼音(简体中文)
 /// </summary>
 /// <param name="chinese"></param>
 /// <param name="format"></param>
 /// <returns></returns>
 public static string GetString(string chinese, PinyinFormat format = PinyinFormat.Default) => GetString(ChineseTypes.Simplified, chinese, format);
Exemplo n.º 19
0
        public static string GetString(string chinese, PinyinFormat format = PinyinFormat.Default, ChineseType chineseType = ChineseType.Simplified)
        {
            var lexicon = ChineseLexicon.Current;

            IEnumerable <int> GetDefaultSteps()
            {
                foreach (var ch in chinese)
                {
                    yield return(1);
                }
            }

            var steps = lexicon is null?GetDefaultSteps() : ChineseTokenizer.SplitWords(chinese, chineseType).Select(x => x.Length);

            if (!chinese.IsNullOrWhiteSpace())
            {
                var sb          = new StringBuilder();
                var insertSpace = false;
                var ptext       = 0;
                foreach (var step in steps)
                {
                    var word = chinese.Substring(ptext, step);
                    try
                    {
                        string pinyin;
                        if (word.Length == 1)
                        {
                            var chineseChar = new ChineseChar(word[0]);
                            pinyin = chineseChar.Pinyins[0].ToString().ToLower();
                        }
                        else
                        {
                            var chineseWord = chineseType == ChineseType.Traditional
                                ? lexicon.Words.First(x => x.Traditional == word)
                                : lexicon.Words.First(x => x.Simplified == word);
                            pinyin = chineseWord.Pinyin;
                        }

                        if (format != PinyinFormat.Code)
                        {
                            if (insertSpace)
                            {
                                sb.Append(" ");
                            }
                        }

                        switch (format)
                        {
                        case PinyinFormat.Default: sb.Append(pinyin); break;

                        case PinyinFormat.WithoutTone: sb.Append(GetPinyinWithoutTone(pinyin)); break;

                        case PinyinFormat.Phonetic: sb.Append(GetPhoneticSymbol(pinyin)); break;

                        case PinyinFormat.Code: sb.Append(pinyin.First()); break;
                        }
                        insertSpace = true;
                    }
                    catch
                    {
                        sb.Append(word);
                        insertSpace = false;
                    }

                    ptext += step;
                }

                return(sb.ToString());
            }
            return(chinese);
        }
Exemplo n.º 20
0
        /// <summary>
        /// 判断拼音格式
        /// </summary>
        /// <param name="hanzi"></param>
        /// <param name="expected"></param>
        /// <param name="format"></param>
        private void PinyinFormatAssert(char hanzi, string expected, PinyinFormat format)
        {
            var fmted = Pinyin4Net.GetFirstPinyin(hanzi, format);

            Assert.AreEqual(expected, fmted);
        }
Exemplo n.º 21
0
 /// <summary>
 /// 扩展 OutputFormat,判断是否包含指定的格式化标识
 /// </summary>
 /// <param name="expected"></param>
 /// <param name="value"></param>
 /// <returns></returns>
 public static bool Contains(this PinyinFormat value, PinyinFormat expected)
 {
     return((expected & value) == expected);
 }
Exemplo n.º 22
0
 string IPinyin.GetPinyin(string s, PinyinFormat format) => format switch
 {
Exemplo n.º 23
0
 /// <inheritdoc cref="IPinyin.GetPinyin(string, PinyinFormat)"/>
 public static string GetPinyin(string s, PinyinFormat format)
 => IPinyin.Instance.GetPinyin(s, format);