Exemple #1
0
 public ChineseChar(char ch, ChineseTypes types, string[] pinyins)
 {
     Char        = ch;
     Pinyins     = pinyins;
     IsPolyphone = Pinyins.Length > 1;
     Types       = types;
 }
Exemple #2
0
        private static string Match(ChineseLexicon lexicon, ChineseTypes chineseType, string part)
        {
            var isMatch = chineseType == ChineseTypes.Traditional
                ? lexicon.Words.Any(x => x.Traditional == part)
                : lexicon.Words.Any(x => x.Simplified == part);

            return(isMatch ? part : null);
        }
Exemple #3
0
        /// <summary>
        /// 获取分词结果。
        /// </summary>
        /// <param name="chineseType"></param>
        /// <param name="chinese"></param>
        /// <returns></returns>
        public static string[] SplitWords(ChineseTypes chineseType, string chinese)
        {
            var scope   = LexiconScope.Current ?? LexiconScope.Default;
            var lexicon = scope.Lexicon;

            if (lexicon is null)
            {
                return(chinese.Select(ch => ch.ToString()).ToArray());
            }

            var list   = new LinkedList <string>();
            var length = chinese.Length;

            var maxOffset        = Math.Min(chinese.Length, lexicon.WordMaxLength);
            var ptext            = length - maxOffset;
            var maxLengthPerTurn = maxOffset;
            int matchLength;

            for (; ptext + maxLengthPerTurn >= 0; ptext -= matchLength)
            {
                matchLength = 1;
                for (var i = ptext > 0 ? 0 : -ptext; i < maxLengthPerTurn; i++)
                {
                    var part = chinese.Substring(ptext + i, maxLengthPerTurn - i);
                    if (part.Length == 1)
                    {
                        list.AddFirst(part);
                        break;
                    }
                    else
                    {
                        var match = Match(lexicon, chineseType, part);
                        if (match != null)
                        {
                            list.AddFirst(match);
                            matchLength = match.Length;
                            break;
                        }
                    }
                }
            }

            return(list.ToArray());
        }
Exemple #4
0
 /// <summary>
 /// 获取中文拼音
 /// </summary>
 /// <param name="Chinese"></param>
 /// <param name="format"></param>
 /// <param name="chineseType"></param>
 /// <returns></returns>
 public static string CHNPinYin(string Chinese, PinyinFormat format, ChineseTypes chineseType = ChineseTypes.Simplified)
 {
     return(Pinyin.GetString(chineseType, Chinese, format));
 }
Exemple #5
0
        /// <summary>
        /// 获取指定类型字符串的拼音
        /// </summary>
        /// <param name="chineseTypes"></param>
        /// <param name="chinese"></param>
        /// <param name="format"></param>
        /// <returns></returns>
        public static string GetString(ChineseTypes chineseTypes, string chinese, PinyinFormat format = PinyinFormat.Default)
        {
            var scope   = LexiconScope.Current ?? LexiconScope.Default;
            var lexicon = scope.Lexicon;

            IEnumerable <int> GetDefaultSteps()
            {
                foreach (var ch in chinese)
                {
                    yield return(1);
                }
            }

            var steps = lexicon is null?GetDefaultSteps() : ChineseTokenizer.SplitWords(chineseTypes, chinese).Select(x => x.Length);

            if (!chinese.IsNullOrWhiteSpace())
            {
                var sb          = new StringBuilder();
                var insertSpace = false;
                var ptext       = 0;
                foreach (var step in steps)
                {
                    var word = chinese.Substring(ptext, step);
                    try
                    {
                        string pinyin = null;

                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified))
                        {
                            pinyin = lexicon.Words.FirstOrDefault(x => x.Simplified == word)?.SimplifiedPinyin;
                        }
                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional))
                        {
                            pinyin = lexicon.Words.FirstOrDefault(x => x.Traditional == word)?.TraditionalPinyin;
                        }
                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified))
                        {
                            pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.SimplifiedPinyin;
                        }
                        if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional))
                        {
                            pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.TraditionalPinyin;
                        }

                        if (pinyin is null)
                        {
                            throw new ArgumentException($"未能匹配文字({word})。");
                        }

                        if (format != PinyinFormat.InitialConsonant)
                        {
                            if (insertSpace)
                            {
                                sb.Append(" ");
                            }
                        }

                        switch (format)
                        {
                        case PinyinFormat.Default: sb.Append(pinyin); break;

                        case PinyinFormat.WithoutTone: sb.Append(GetPinyinWithoutTone(pinyin)); break;

                        case PinyinFormat.Phonetic: sb.Append(GetPhoneticSymbol(pinyin)); break;

                        case PinyinFormat.InitialConsonant: sb.Append(pinyin.First()); break;
                        }
                        insertSpace = true;
                    }
                    catch
                    {
                        sb.Append(word);
                        insertSpace = false;
                    }

                    ptext += step;
                }

                return(sb.ToString());
            }
            return(chinese);
        }