Пример #1
0
        /**
         * Get a string which all Chinese characters are replaced by corresponding
         * main (first) Hanyu Pinyin representation.
         *
         * <p>
         * <b>Special Note</b>: If the return contains "none0", that means that
         * Chinese character is in Unicode CJK talbe, however, it has not
         * pronounciation in Chinese. <b> This interface will be removed in next
         * release. </b>
         *
         * @param str
         *            A given string contains Chinese characters
         * @param outputFormat
         *            Describes the desired format of returned Hanyu Pinyin string
         * @param seperater
         *            The string is appended after a Chinese character (excluding
         *            the last Chinese character at the end of sentence). <b>Note!
         *            Seperater will not appear after a non-Chinese character</b>
         * @return a String identical to the original one but all recognizable
         *         Chinese characters are converted into main (first) Hanyu Pinyin
         *         representation
         *
         * @deprecated DO NOT use it again because the first retrived pinyin string
         *             may be a wrong pronouciation in a certain sentence context.
         *             <b> This interface will be removed in next release. </b>
         */
        static public String toHanyuPinyinString(String str,
                                                 HanyuPinyinOutputFormat outputFormat, String seperater)
        {
            StringBuilder resultPinyinStrBuf = new StringBuilder();

            for (int i = 0; i < str.Length; i++)
            {
                String mainPinyinStrOfChar = getFirstHanyuPinyinString(str[i], outputFormat);

                if (null != mainPinyinStrOfChar)
                {
                    resultPinyinStrBuf.Append(mainPinyinStrOfChar);
                    if (i != str.Length - 1)
                    { // avoid appending at the end
                        resultPinyinStrBuf.Append(seperater);
                    }
                }
                else
                {
                    resultPinyinStrBuf.Append(str[i]);
                }
            }

            return(resultPinyinStrBuf.ToString());
        }
Пример #2
0
 static StringExtensions()
 {
     format           = new HanyuPinyinOutputFormat();
     format.ToneType  = HanyuPinyinToneType.WITHOUT_TONE;
     format.VCharType = HanyuPinyinVCharType.WITH_V;
     format.CaseType  = HanyuPinyinCaseType.LOWERCASE;
 }
Пример #3
0
        public void ProcessRequest(HttpContext context)
        {
            HttpRequest  req = context.Request;
            HttpResponse res = context.Response;

            // 这里设置UTF8,避免乱码
            res.Charset         = "UTF-8";
            res.ContentEncoding = System.Text.Encoding.UTF8;
            res.ContentType     = "text/plain";

            string pinyin = string.Empty;

            try
            {
                string hanzi = req["hanzi"];
                // 用于控制多音字的返回, 有两种取值 first:取第1个音,all:取所有音 默认为取第1个音
                string multi = req["multi"];

                // 请求参数不为空才处理
                if (!string.IsNullOrEmpty(hanzi))
                {
                    #region // 解析从客户端来的输出格式设置
                    HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

                    format.setCaseType(new HanyuPinyinCaseType(req["caseType"]));
                    format.setToneType(new HanyuPinyinToneType(req["toneType"]));
                    format.setVCharType(new HanyuPinyinVCharType(req["vType"]));
                    #endregion

                    foreach (char ch in hanzi)
                    {
                        if (Util.IsHanzi(ch))
                        {
                            // 是汉字才处理
                            string[] py = PinyinHelper.toHanyuPinyinStringArray(ch, format);

                            if (multi.Equals("first", StringComparison.OrdinalIgnoreCase) || py.Length == 1)
                            {
                                // 拼音间追加一个空格,这里如果是多间字,拼音可能不准确
                                pinyin += py[0] + " ";
                            }
                            else
                            {
                                pinyin += "(" + string.Join(",", py) + ") ";
                            }
                        }
                        else
                        {// 不是汉字直接追加
                            pinyin += ch.ToString();
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                pinyin = ex.Message;
            }

            res.Write(pinyin);
        }
Пример #4
0
        public string TestVCharType(char ch, HanyuPinyinVCharType vcharType)
        {
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.VCharType = vcharType;
            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]);
        }
Пример #5
0
        public void TestToneMarkWithUAndColon(char ch, HanyuPinyinVCharType vcharType)
        {
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.ToneType  = HanyuPinyinToneType.WITH_TONE_MARK;
            format.VCharType = vcharType;
            Assert.Throws <InvalidHanyuPinyinFormatException>(() => PinyinHelper.ToHanyuPinyinStringArray(ch, format));
        }
Пример #6
0
        public string TestToneMark(char ch)
        {
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.ToneType  = HanyuPinyinToneType.WITH_TONE_MARK;
            format.VCharType = HanyuPinyinVCharType.WITH_U_UNICODE;
            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]);
        }
Пример #7
0
        public void TestToneMarkWithUAndColon(char ch, HanyuPinyinVCharType vcharType)
        {
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.ToneType  = HanyuPinyinToneType.WITH_TONE_MARK;
            format.VCharType = vcharType;
            PinyinHelper.ToHanyuPinyinStringArray(ch, format);
        }
Пример #8
0
        public void TestToneMarkWithUAndColon(char ch, HanyuPinyinVCharType vcharType)
        {
            var format = new HanyuPinyinOutputFormat {
                ToneType  = HanyuPinyinToneType.WithToneMark,
                VCharType = vcharType
            };

            Assert.Throws <InvalidHanyuPinyinFormatException> (() => PinyinHelper.ToHanyuPinyinStringArray(ch, format));
        }
Пример #9
0
        public string TestToneMark(char ch)
        {
            var format = new HanyuPinyinOutputFormat {
                ToneType  = HanyuPinyinToneType.WithToneMark,
                VCharType = HanyuPinyinVCharType.WithUUnicode
            };

            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format) [0]);
        }
Пример #10
0
        public void TestVCharType(char ch, HanyuPinyinVCharType vcharType, string result)
        {
            var format = new HanyuPinyinOutputFormat
            {
                VCharType = vcharType
            };

            Assert.Equal(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]);
        }
Пример #11
0
        public string TestCaseType(char ch, HanyuPinyinVCharType vcharType)
        {
            var format = new HanyuPinyinOutputFormat {
                CaseType  = HanyuPinyinCaseType.Uppercase,
                VCharType = vcharType
            };

            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format) [0]);
        }
Пример #12
0
        public void TestToneMark(char ch, string result)
        {
            var format = new HanyuPinyinOutputFormat
            {
                ToneType  = HanyuPinyinToneType.WITH_TONE_MARK,
                VCharType = HanyuPinyinVCharType.WITH_U_UNICODE
            };

            Assert.Equal(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]);
        }
Пример #13
0
        public string TestWithToneNumber(
            char ch, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType)
        {
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.ToneType  = HanyuPinyinToneType.WITH_TONE_NUMBER;
            format.VCharType = vcharType;
            format.CaseType  = caseType;
            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]);
        }
Пример #14
0
        public string TestWithToneNumber(
            char ch, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType)
        {
            var format = new HanyuPinyinOutputFormat {
                ToneType  = HanyuPinyinToneType.WithToneNumber,
                VCharType = vcharType,
                CaseType  = caseType
            };

            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format) [0]);
        }
Пример #15
0
        public string[] TestCharWithMultiplePronouciations(
            char ch, HanyuPinyinToneType toneType,
            HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType)
        {
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.ToneType  = toneType;
            format.VCharType = vcharType;
            format.CaseType  = caseType;
            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format));
        }
Пример #16
0
        public void TestWithToneNumber(char ch, HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType, string result)
        {
            var format = new HanyuPinyinOutputFormat
            {
                ToneType  = HanyuPinyinToneType.WITH_TONE_NUMBER,
                VCharType = vcharType,
                CaseType  = caseType
            };

            Assert.Equal(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)[0]);
        }
Пример #17
0
        public string[] TestCharWithMultiplePronunciations(
            char ch, HanyuPinyinToneType toneType,
            HanyuPinyinVCharType vcharType, HanyuPinyinCaseType caseType)
        {
            var format = new HanyuPinyinOutputFormat {
                ToneType  = toneType,
                VCharType = vcharType,
                CaseType  = caseType
            };

            return(PinyinHelper.ToHanyuPinyinStringArray(ch, format));
        }
Пример #18
0
        public void TestCharWithMultiplePronouciations(
            char ch, HanyuPinyinToneType toneType, HanyuPinyinVCharType vcharType,
            HanyuPinyinCaseType caseType, string[] result)
        {
            var format = new HanyuPinyinOutputFormat
            {
                ToneType  = toneType,
                VCharType = vcharType,
                CaseType  = caseType
            };

            Assert.True(Enumerable.SequenceEqual(result, PinyinHelper.ToHanyuPinyinStringArray(ch, format)));
        }
Пример #19
0
        private static string[] GetFomattedHanyuPinyinStringArray(
            char ch, HanyuPinyinOutputFormat format)
        {
            string[] unformattedArr = GetUnformattedHanyuPinyinStringArray(ch);
            if (null != unformattedArr)
            {
                for (int i = 0; i < unformattedArr.Length; i++)
                {
                    unformattedArr[i] = PinyinFormatter.FormatHanyuPinyin(unformattedArr[i], format);
                }
            }

            return(unformattedArr);
        }
Пример #20
0
        /**
         * Get the first Hanyu Pinyin of a Chinese character <b> This function will
         * be removed in next release. </b>
         *
         * @param ch
         *            The given Unicode character
         * @param outputFormat
         *            Describes the desired format of returned Hanyu Pinyin string
         * @return Return the first Hanyu Pinyin of given Chinese character; return
         *         null if the input is not a Chinese character
         *
         * @deprecated DO NOT use it again because the first retrived pinyin string
         *             may be a wrong pronouciation in a certain sentence context.
         *             <b> This function will be removed in next release. </b>
         */
        static private String getFirstHanyuPinyinString(char ch,
                                                        HanyuPinyinOutputFormat outputFormat)
        {
            String[] pinyinStrArray = getFormattedHanyuPinyinStringArray(ch, outputFormat);

            if ((null != pinyinStrArray) && (pinyinStrArray.Length > 0))
            {
                return(pinyinStrArray[0]);
            }
            else
            {
                return(null);
            }
        }
Пример #21
0
        /**
         * Return the formatted Hanyu Pinyin representations of the given Chinese
         * character (both in Simplified and Tranditional) in array format.
         *
         * @param ch
         *            the given Chinese character
         * @param outputFormat
         *            Describes the desired format of returned Hanyu Pinyin string
         * @return The formatted Hanyu Pinyin representations of the given codepoint
         *         in array format; null if no record is found in the hashtable.
         */
        static private String[] getFormattedHanyuPinyinStringArray(char ch,
                                                                   HanyuPinyinOutputFormat outputFormat)
        {
            String[] pinyinStrArray = getUnformattedHanyuPinyinStringArray(ch);

            if (null != pinyinStrArray)
            {
                for (int i = 0; i < pinyinStrArray.Length; i++)
                {
                    pinyinStrArray[i] = PinyinFormatter.formatHanyuPinyin(pinyinStrArray[i], outputFormat);
                }

                return(pinyinStrArray);
            }
            else
            {
                return(null);
            }
        }
Пример #22
0
        /// <summary>
        /// Convert Hanyu pinyin to given format
        /// </summary>
        /// <param name="pinyin">The given Hanyu pinyin string </param>
        /// <param name="outputFormat">The given format</param>
        /// <returns>The Hanyu pinyin with given format</returns>
        internal static string FormatHanyuPinyin(
            string pinyin, HanyuPinyinOutputFormat outputFormat)
        {
            if (outputFormat == null)
            {
                throw new ArgumentNullException("The parameter outputFormat could not be null.");
            }

            if (outputFormat.ToneType == HanyuPinyinToneType.WITH_TONE_MARK &&
                (outputFormat.VCharType == HanyuPinyinVCharType.WITH_U_AND_COLON || outputFormat.VCharType == HanyuPinyinVCharType.WITH_V))
            {
                throw new InvalidHanyuPinyinFormatException("Tone marks cannot be added to v or u:");
            }

            string result = pinyin.ToLower();

            if (outputFormat.ToneType == HanyuPinyinToneType.WITHOUT_TONE)
            {
                result = Regex.Replace(pinyin, "[0-9]", "");
            }
            else if (outputFormat.ToneType == HanyuPinyinToneType.WITH_TONE_MARK)
            {
                result = ConvertToneNumber2ToneMark(result.Replace("u:", "v"));
            }

            if (outputFormat.VCharType == HanyuPinyinVCharType.WITH_V)
            {
                result = result.Replace("u:", "v");
            }
            else if (outputFormat.VCharType == HanyuPinyinVCharType.WITH_U_UNICODE)
            {
                result = result.Replace("u:", "ü");
            }

            if (outputFormat.CaseType == HanyuPinyinCaseType.UPPERCASE)
            {
                result = result.ToUpper();
            }

            return(result);
        }
Пример #23
0
        /// <summary>
        /// 汉字转换拼音
        /// </summary>
        /// <param name="self"></param>
        /// <returns></returns>
        public static string ToPinYin(this string self)
        {
            if (self.NullEmpty())
            {
                return(string.Empty);
            }
            HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();

            format.ToneType = HanyuPinyinToneType.WITHOUT_TONE;
            StringBuilder sb = new StringBuilder();

            foreach (var item in self)
            {
                string[] pinyinStr = PinyinHelper.ToHanyuPinyinStringArray(item, format);
                sb.Append(pinyinStr.JoinBy(" "));
            }

            //return NPinyin.Pinyin.GetPinyin(self, Encoding.UTF8);

            return(sb.ToString());
        }
Пример #24
0
        public static string GetPinYin(string chineseString)
        {
            if (string.IsNullOrWhiteSpace(chineseString))
            {
                return(string.Empty);
            }
            Func <char, string> getPinYin = c => {
                HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
                format.ToneType = HanyuPinyinToneType.WITHOUT_TONE;
                string[] pinYin = PinyinHelper.ToHanyuPinyinStringArray(c, format);
                if (pinYin == null)
                {
                    return(c.ToString());
                }
                var one = pinYin[0];
                return(one);
            };
            var result = chineseString.ToCharArray().Select(getPinYin).ToArray();

            return(string.Join("", result));
        }
Пример #25
0
        public string[] Convert(char chineseCharacter, EToneType toneType)
        {
            var format = new HanyuPinyinOutputFormat
            {
                CaseType  = HanyuPinyinCaseType.Lowercase,
                ToneType  = HanyuPinyinToneType.WithToneMark,
                VCharType = HanyuPinyinVCharType.WithUUnicode
            };

            switch (toneType)
            {
            case EToneType.Without:
                format.ToneType = HanyuPinyinToneType.WithoutTone;
                break;

            case EToneType.Number:
                format.ToneType = HanyuPinyinToneType.WithToneNumber;
                break;
            }

            return(PinyinHelper.ToHanyuPinyinStringArray(chineseCharacter, format));
        }
Пример #26
0
        internal static String formatHanyuPinyin(String pinyinStr,
                                                 HanyuPinyinOutputFormat outputFormat)
        {
            if ((HanyuPinyinToneType.WITH_TONE_MARK.Equals(outputFormat.getToneType())) &&
                (
                    (HanyuPinyinVCharType.WITH_V.Equals(outputFormat.getVCharType())) ||
                    (HanyuPinyinVCharType.WITH_U_AND_COLON.Equals(outputFormat.getVCharType()))
                ))
            {
                throw new BadHanyuPinyinOutputFormatCombination("tone marks cannot be added to v or u:");
            }

            if (HanyuPinyinToneType.WITHOUT_TONE.Equals(outputFormat.getToneType()))
            {
                Regex reg = new Regex("[1-5]");
                pinyinStr = reg.Replace(pinyinStr, "");
            }
            else if (HanyuPinyinToneType.WITH_TONE_MARK.Equals(outputFormat.getToneType()))
            {
                pinyinStr = pinyinStr.Replace("u:", "v");
                pinyinStr = convertToneNumber2ToneMark(pinyinStr);
            }

            if (HanyuPinyinVCharType.WITH_V.Equals(outputFormat.getVCharType()))
            {
                pinyinStr = pinyinStr.Replace("u:", "v");
            }
            else if (HanyuPinyinVCharType.WITH_U_UNICODE.Equals(outputFormat.getVCharType()))
            {
                pinyinStr = pinyinStr.Replace("u:", "ü");
            }

            if (HanyuPinyinCaseType.UPPERCASE.Equals(outputFormat.getCaseType()))
            {
                pinyinStr = pinyinStr.ToUpper();
            }
            return(pinyinStr);
        }
Пример #27
0
        /// <summary>
        /// 将汉字转换伟拼音
        /// </summary>
        /// <param name="hanziStr">待转换汉字</param>
        /// <param name="hanyuPinyinOutputFormat">转换格式</param>
        /// <returns></returns>
        public static string ToHanyuPinyinString(string hanziStr, HanyuPinyinOutputFormat hanyuPinyinOutputFormat = null)
        {
            var pinyin = string.Empty;

            if (hanyuPinyinOutputFormat == null)
            {
                hanyuPinyinOutputFormat = new HanyuPinyinOutputFormat();
            }
            foreach (var hanzi in hanziStr)
            {
                if (!Util.IsHanzi(hanzi))
                {
                    continue;
                }

                var pinyinStrArray = ChineseToPinyinConvert.GetInstance().GetHanyuPinyinStringArray(hanzi);
                if (null != pinyinStrArray)
                {
                    if (pinyinStrArray.Length == 1)
                    {
                        pinyin += PinyinFormatter.FormatHanyuPinyin(pinyinStrArray[0], hanyuPinyinOutputFormat);
                    }
                    else
                    {
                        for (var i = 0; i < pinyinStrArray.Length; i++)
                        {
                            pinyinStrArray[i] = PinyinFormatter.FormatHanyuPinyin(pinyinStrArray[i], hanyuPinyinOutputFormat);
                        }
                        pinyin += "(" + string.Join(",", pinyinStrArray) + ") ";
                    }
                }
                else
                {
                    return(string.Empty);
                }
            }
            return(pinyin);
        }
Пример #28
0
        /// <summary>
        /// formatted Hanyu Pinyin string
        /// </summary>
        /// <param name="pinyinStr">unformatted Hanyu Pinyin string</param>
        /// <param name="outputFormat">given format of Hanyu Pinyin</param>
        /// <returns>formatted Hanyu Pinyin string</returns>
        /// <exception cref="BadHanyuPinyinOutputFormatCombination"></exception>
        public static string FormatHanyuPinyin(string pinyinStr, HanyuPinyinOutputFormat outputFormat)
        {
            if ((HanyuPinyinToneType.WITH_TONE_MARK == outputFormat.ToneType)
                &&
                ((HanyuPinyinVCharType.WITH_V == outputFormat.VCharType) ||
                 (HanyuPinyinVCharType.WITH_U_AND_COLON == outputFormat.VCharType)))
            {
                throw new BadHanyuPinyinOutputFormatCombination("tone marks cannot be added to v or u:");
            }

            if (HanyuPinyinToneType.WITHOUT_TONE == outputFormat.ToneType)
            {
                //Using Regular Expressions
                pinyinStr = Regex.Replace(pinyinStr, "[1-5]", "");
            }
            else if (HanyuPinyinToneType.WITH_TONE_MARK == outputFormat.ToneType)
            {
                pinyinStr = pinyinStr.Replace("u:", "v");
                pinyinStr = ConvertToneNumber2ToneMark(pinyinStr);
            }

            if (HanyuPinyinVCharType.WITH_V == outputFormat.VCharType)
            {
                pinyinStr = pinyinStr.Replace("u:", "v");
            }
            else if (HanyuPinyinVCharType.WITH_U_UNICODE == outputFormat.VCharType)
            {
                pinyinStr = pinyinStr.Replace("u:", "ü");
            }

            if (HanyuPinyinCaseType.UPPERCASE == outputFormat.CaseType)
            {
                pinyinStr = pinyinStr.ToUpper();
            }
            return(pinyinStr);
        }
Пример #29
0
 /**
  * Get all Hanyu Pinyin presentations of a single Chinese character (both
  * Simplified and Tranditional)
  *
  * <p>
  * For example, <br/> If the input is '间', the return will be an array with
  * two Hanyu Pinyin strings: <br/> "jian1" <br/> "jian4" <br/> <br/> If the
  * input is '李', the return will be an array with single Hanyu Pinyin
  * string: <br/> "li3"
  *
  * <p>
  * <b>Special Note</b>: If the return is "none0", that means the input
  * Chinese character is in Unicode CJK talbe, however, it has no
  * pronounciation in Chinese
  *
  * @param ch
  *            the given Chinese character
  * @param outputFormat
  *            describes the desired format of returned Hanyu Pinyin String
  *
  * @return a String array contains all Hanyu Pinyin presentations with tone
  *         numbers; return null for non-Chinese character
  *
  * @throws BadHanyuPinyinOutputFormatCombination
  *             if certain combination of output formats happens
  *
  * @see HanyuPinyinOutputFormat
  * @see BadHanyuPinyinOutputFormatCombination
  *
  */
 static public String[] toHanyuPinyinStringArray(char ch,
                                                 HanyuPinyinOutputFormat outputFormat)
 {
     return(getFormattedHanyuPinyinStringArray(ch, outputFormat));
 }
Пример #30
0
 /// <summary>
 /// Get all Hanyu pinyin of a single Chinese character (both
 /// Simplified Chinese and Traditional Chinese).
 /// </summary>
 /// <param name="ch">The given Chinese character</param>
 /// <param name="format">The given output format</param>
 /// <returns>A string array contains all Hanyu pinyin presentations; return
 /// null for non-Chinese character.</returns>
 public static string[] ToHanyuPinyinStringArray(
     char ch, HanyuPinyinOutputFormat format)
 {
     return(GetFomattedHanyuPinyinStringArray(ch, format));
 }