public ChineseChar(char ch, ChineseTypes types, string[] pinyins) { Char = ch; Pinyins = pinyins; IsPolyphone = Pinyins.Length > 1; Types = types; }
private static string Match(ChineseLexicon lexicon, ChineseTypes chineseType, string part) { var isMatch = chineseType == ChineseTypes.Traditional ? lexicon.Words.Any(x => x.Traditional == part) : lexicon.Words.Any(x => x.Simplified == part); return(isMatch ? part : null); }
/// <summary> /// 获取分词结果。 /// </summary> /// <param name="chineseType"></param> /// <param name="chinese"></param> /// <returns></returns> public static string[] SplitWords(ChineseTypes chineseType, string chinese) { var scope = LexiconScope.Current ?? LexiconScope.Default; var lexicon = scope.Lexicon; if (lexicon is null) { return(chinese.Select(ch => ch.ToString()).ToArray()); } var list = new LinkedList <string>(); var length = chinese.Length; var maxOffset = Math.Min(chinese.Length, lexicon.WordMaxLength); var ptext = length - maxOffset; var maxLengthPerTurn = maxOffset; int matchLength; for (; ptext + maxLengthPerTurn >= 0; ptext -= matchLength) { matchLength = 1; for (var i = ptext > 0 ? 0 : -ptext; i < maxLengthPerTurn; i++) { var part = chinese.Substring(ptext + i, maxLengthPerTurn - i); if (part.Length == 1) { list.AddFirst(part); break; } else { var match = Match(lexicon, chineseType, part); if (match != null) { list.AddFirst(match); matchLength = match.Length; break; } } } } return(list.ToArray()); }
/// <summary> /// 获取中文拼音 /// </summary> /// <param name="Chinese"></param> /// <param name="format"></param> /// <param name="chineseType"></param> /// <returns></returns> public static string CHNPinYin(string Chinese, PinyinFormat format, ChineseTypes chineseType = ChineseTypes.Simplified) { return(Pinyin.GetString(chineseType, Chinese, format)); }
/// <summary> /// 获取指定类型字符串的拼音 /// </summary> /// <param name="chineseTypes"></param> /// <param name="chinese"></param> /// <param name="format"></param> /// <returns></returns> public static string GetString(ChineseTypes chineseTypes, string chinese, PinyinFormat format = PinyinFormat.Default) { var scope = LexiconScope.Current ?? LexiconScope.Default; var lexicon = scope.Lexicon; IEnumerable <int> GetDefaultSteps() { foreach (var ch in chinese) { yield return(1); } } var steps = lexicon is null?GetDefaultSteps() : ChineseTokenizer.SplitWords(chineseTypes, chinese).Select(x => x.Length); if (!chinese.IsNullOrWhiteSpace()) { var sb = new StringBuilder(); var insertSpace = false; var ptext = 0; foreach (var step in steps) { var word = chinese.Substring(ptext, step); try { string pinyin = null; if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified)) { pinyin = lexicon.Words.FirstOrDefault(x => x.Simplified == word)?.SimplifiedPinyin; } if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional)) { pinyin = lexicon.Words.FirstOrDefault(x => x.Traditional == word)?.TraditionalPinyin; } if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Simplified)) { pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.SimplifiedPinyin; } if (pinyin is null && chineseTypes.HasFlag(ChineseTypes.Traditional)) { pinyin = Builtin.ChineseChars.FirstOrDefault(x => x.Char == word[0])?.TraditionalPinyin; } if (pinyin is null) { throw new ArgumentException($"未能匹配文字({word})。"); } if (format != PinyinFormat.InitialConsonant) { if (insertSpace) { sb.Append(" "); } } switch (format) { case PinyinFormat.Default: sb.Append(pinyin); break; case PinyinFormat.WithoutTone: sb.Append(GetPinyinWithoutTone(pinyin)); break; case PinyinFormat.Phonetic: sb.Append(GetPhoneticSymbol(pinyin)); break; case PinyinFormat.InitialConsonant: sb.Append(pinyin.First()); break; } insertSpace = true; } catch { sb.Append(word); insertSpace = false; } ptext += step; } return(sb.ToString()); } return(chinese); }