public void ProcessRequest(HttpContext context) { HttpRequest req = context.Request; HttpResponse res = context.Response; // 这里设置UTF8,避免乱码 res.Charset = "UTF-8"; res.ContentEncoding = System.Text.Encoding.UTF8; res.ContentType = "text/plain"; string pinyin = string.Empty; try { string hanzi = req["hanzi"]; // 用于控制多音字的返回, 有两种取值 first:取第1个音,all:取所有音 默认为取第1个音 string multi = req["multi"]; // 请求参数不为空才处理 if (!string.IsNullOrEmpty(hanzi)) { #region // 解析从客户端来的输出格式设置 HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat(); format.setCaseType(new HanyuPinyinCaseType(req["caseType"])); format.setToneType(new HanyuPinyinToneType(req["toneType"])); format.setVCharType(new HanyuPinyinVCharType(req["vType"])); #endregion foreach (char ch in hanzi) { if (Util.IsHanzi(ch)) { // 是汉字才处理 string[] py = PinyinHelper.toHanyuPinyinStringArray(ch, format); if (multi.Equals("first", StringComparison.OrdinalIgnoreCase) || py.Length == 1) { // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确 pinyin += py[0] + " "; } else { pinyin += "(" + string.Join(",", py) + ") "; } } else {// 不是汉字直接追加 pinyin += ch.ToString(); } } } } catch (Exception ex) { pinyin = ex.Message; } res.Write(pinyin); }
/// <summary> /// replace chinese character with pinyin, non chinese character won't be modified /// Because we don't have words dictionary, so we can only return all possibly pinyin combination /// e.g. 音乐 will return yinyue and yinle /// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param> /// </summary> public string[][] PinyinCombination(string characters) { if (!_settings.ShouldUsePinyin || string.IsNullOrEmpty(characters)) { return(_empty2DStringArray); } if (!_pinyinCache.ContainsKey(characters)) { var allPinyins = new List <string[]>(); foreach (var c in characters) { var pinyins = PinyinHelper.toHanyuPinyinStringArray(c, _pinyinFormat); if (pinyins != null) { var r = pinyins.Distinct().ToArray(); allPinyins.Add(r); } else { var r = new[] { c.ToString() }; allPinyins.Add(r); } } var combination = allPinyins.Aggregate(Combination).Select(c => c.Split(';')).ToArray(); _pinyinCache[characters] = combination; return(combination); } else { return(_pinyinCache[characters]); } }
/// <summary> /// replace chinese character with pinyin, non chinese character won't be modified /// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param> /// </summary> public static string[] Pinyin(string word) { string[] pinyin = word.Select(c => { string[] pinyins = PinyinHelper.toHanyuPinyinStringArray(c); string result = pinyins == null ? c.ToString() : pinyins[0]; return(result); }).ToArray(); return(pinyin); }
/// <summmary> /// replace chinese character with pinyin, non chinese character won't be modified /// Because we don't have words dictionary, so we can only return all possiblie pinyin combination /// e.g. 音乐 will return yinyue and yinle /// <param name="word"> should be word or sentence, instead of single character. e.g. 微软 </param> /// </summmary> public static string[][] PinyinComination(string word) { var combination = word.Select(c => { var pinyins = PinyinHelper.toHanyuPinyinStringArray(c); var result = pinyins ?? new[] { c.ToString() }; return(result); }).Aggregate(Combination) .Select(c => c.Split(';')) .ToArray(); return(combination); }
public static void Initialize() { Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () => { _pinyinStorage = new BinaryStorage <ConcurrentDictionary <string, string> >("Pinyin"); PinyinCache = _pinyinStorage.TryLoad(new ConcurrentDictionary <string, string>()); // force pinyin library static constructor initialize PinyinHelper.toHanyuPinyinStringArray('T', Format); }); Log.Info( $"|Wox.Infrastructure.Alphabet.Init ialize|Number of preload pinyin combination<{PinyinCache.Count}>"); }
private void InitializePinyinHelpers() { _pinyinFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Stopwatch.Normal("|Wox.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () => { _pinyinStorage = new BinaryStorage <Dictionary <string, string[][]> >("Pinyin"); SetPinyinCacheAsDictionary(_pinyinStorage.TryLoad(new Dictionary <string, string[][]>())); // force pinyin library static constructor initialize PinyinHelper.toHanyuPinyinStringArray('T', _pinyinFormat); }); Log.Info($"Number of preload pinyin combination<{_pinyinCache.Count}>", GetType()); }
private void InitializePinyinHelpers() { Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Logger.StopWatchNormal("Preload pinyin cache", () => { _pinyinStorage = new BinaryStorage <ConcurrentDictionary <string, string[][]> >("Pinyin"); PinyinCache = _pinyinStorage.TryLoad(new ConcurrentDictionary <string, string[][]>()); // force pinyin library static constructor initialize PinyinHelper.toHanyuPinyinStringArray('T', Format); }); Logger.WoxInfo($"Number of preload pinyin combination<{PinyinCache.Count}>"); }
public string[] Pinyin(string word) { if (!_settings.ShouldUsePinyin) { return(_emptyStringArray); } var pinyin = word.Select(c => { var pinyins = PinyinHelper.toHanyuPinyinStringArray(c); var result = pinyins == null ? c.ToString() : pinyins[0]; return(result); }).ToArray(); return(pinyin); }
private void InitializePinyinHelpers() { Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Stopwatch.Normal("|Flow Launcher.Infrastructure.Alphabet.Initialize|Preload pinyin cache", () => { _pinyinStorage = new BinaryStorage <Dictionary <string, string[][]> >("Pinyin"); lock (_pinyinStorage) { var loaded = _pinyinStorage.TryLoad(new Dictionary <string, string[][]>()); PinyinCache = new ConcurrentDictionary <string, string[][]>(loaded); } // force pinyin library static constructor initialize PinyinHelper.toHanyuPinyinStringArray('T', Format); }); Log.Info($"|Flow Launcher.Infrastructure.Alphabet.Initialize|Number of preload pinyin combination<{PinyinCache.Count}>"); }
/// <summmary> /// replace chinese character with pinyin, non chinese character won't be modified /// Because we don't have words dictionary, so we can only return all possiblie pinyin combination /// e.g. 音乐 will return yinyue and yinle /// <param name="characters"> should be word or sentence, instead of single character. e.g. 微软 </param> /// </summmary> public static string PinyinComination(string characters) { if (!string.IsNullOrEmpty(characters)) { if (!PinyinCache.ContainsKey(characters)) { List <string> allPinyins = new List <string>(); foreach (char c in characters) { string[] pinyins = PinyinHelper.toHanyuPinyinStringArray(c, Format); if (pinyins != null) { if (pinyins.Length > 0) { string titleCase = Thread.CurrentThread.CurrentCulture.TextInfo.ToTitleCase(pinyins[0]); allPinyins.Add(titleCase); } else { allPinyins.Add(c.ToString()); } } else { allPinyins.Add(c.ToString()); } } string combination = string.Join("", allPinyins); PinyinCache[characters] = combination; return(combination); } return(PinyinCache[characters]); } return(""); }
private void InitializePinyinHelpers() { Format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); PinyinHelper.toHanyuPinyinStringArray('T', Format); }
public void testOutputCombination() { try { HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat(); // fix case type to lowercase firstly, change VChar and Tone // combination outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); // WITH_U_AND_COLON and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("lu:3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("lv3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("lü3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // // WITH_U_AND_COLON and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("lu:", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("lv", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("lü", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_AND_COLON and WITH_TONE_MARK is forbidden // WITH_V and WITH_TONE_MARK is forbidden // WITH_U_UNICODE and WITH_TONE_MARK outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); Assert.AreEqual("lǚ", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // fix case type to UPPERCASE, change VChar and Tone // combination outputFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); // WITH_U_AND_COLON and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("LU:3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("LV3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITH_TONE_NUMBER outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_NUMBER); Assert.AreEqual("LÜ3", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // // WITH_U_AND_COLON and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("LU:", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_V and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("LV", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_UNICODE and WITHOUT_TONE outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); Assert.AreEqual("LÜ", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); // WITH_U_AND_COLON and WITH_TONE_MARK is forbidden // WITH_V and WITH_TONE_MARK is forbidden // WITH_U_UNICODE and WITH_TONE_MARK outputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); outputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); Assert.AreEqual("LǙ", PinyinHelper.toHanyuPinyinStringArray('吕', outputFormat)[0]); } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } }
public void testToHanyuPinyinStringArray() { // any input of non-Chinese characters will return null { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); try { Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray('A', defaultFormat)); Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray('z', defaultFormat)); Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray(',', defaultFormat)); Assert.IsNull(PinyinHelper.toHanyuPinyinStringArray('。', defaultFormat)); } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } // Chinese characters // single pronounciation { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "li3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('李', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat upperCaseFormat = new HanyuPinyinOutputFormat(); upperCaseFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE); String[] expectedPinyinArray = new String[] { "LI3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('李', upperCaseFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "lu:3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('吕', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat vCharFormat = new HanyuPinyinOutputFormat(); vCharFormat.setVCharType(HanyuPinyinVCharType.WITH_V); String[] expectedPinyinArray = new String[] { "lv3" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('吕', vCharFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } // multiple pronounciations { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "jian1", "jian4" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('间', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } { try { HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat(); String[] expectedPinyinArray = new String[] { "hao3", "hao4" }; String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray('好', defaultFormat); Assert.AreEqual(expectedPinyinArray.Length, pinyinArray.Length); for (int i = 0; i < expectedPinyinArray.Length; i++) { Assert.AreEqual(expectedPinyinArray[i], pinyinArray[i]); } } catch (BadHanyuPinyinOutputFormatCombination e) { Util.Log(e); } } }