Пример #1
0
        public void TestConvertChineseNumeral2Arabic()
        {
            Assert.AreEqual(
                NumeralUtil.ConvertChineseNumeral2Arabic("你好,这里有二百三十五块钱,收好了,总共二千二百五十二块。二减五等于负三万零五十。"),
                "你好,这里有235块钱,收好了,总共2252块。2减5等于-30050。"
                );

            Assert.AreEqual(
                NumeralUtil.ConvertChineseNumeral2Arabic("公元二零零五年四月"),
                "公元2005年4月"
                );
            Assert.AreEqual(
                NumeralUtil.ConvertChineseNumeral2Arabic("公元前四五五年"),
                "公元前455年"
                );
        }
Пример #2
0
        public ParseResultCollection Parse(int startIndex)
        {
            string _text = context.Text;
            ParseResultCollection prc = new ParseResultCollection();
            string temp = _text.Substring(startIndex, Math.Min(maxChineseAddressLength, _text.Length - startIndex));

            char[] chars = temp.ToCharArray();
            //int lastStartPos = 0;
            StringBuilder  sb       = new StringBuilder();
            StringBuilder  whole    = new StringBuilder();
            ChineseAddress ca       = new ChineseAddress();
            int            startpos = 0;

            //TODO: 通过字典找国家名
            if (temp.StartsWith("中国"))
            {
                startpos   = 2;
                ca.country = "中国";
                whole.Append("中国");
            }
            for (int i = startpos; i < chars.Length; i++)
            {
                char ch = chars[i];
                if (ch == '市' || ch == '场')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    sb.Append(ch);
                    string subStr = sb.ToString();
                    string city   = GetMaximumMatch(subStr, 0, 5);
                    if (city != null)
                    {
                        ca.city = city;
                        whole.Append(ca.city);
                        sb = new StringBuilder();
                    }
                }
                else if (ch == '区')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    sb.Append(ch);
                    string subStr = sb.ToString();

                    string district = GetMaximumMatch(subStr, 0, 5);
                    if (district != null)
                    {
                        if (!district.EndsWith("区"))
                        {
                            ca.city = district;
                            whole.Append(ca.city);
                            ca.district = subStr.Substring(ca.city.Length);
                            whole.Append(ca.district);
                        }
                        else
                        {
                            //string district = NEParser.GetMaximumMatch(subStr, 0, 5, "district", _cityNames, null);
                            ca.district = district;
                            whole.Append(ca.district);
                        }
                    }
                    else
                    {
                        ca.district = subStr;
                        whole.Append(ca.district);
                    }
                    sb = new StringBuilder();
                }
                else if (ch == '省')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    sb.Append(ch);
                    string subStr   = sb.ToString();
                    string province = GetMaximumMatch(subStr, 0, 5);    //省份
                    if (province != null)
                    {
                        ca.province = province;
                        whole.Append(ca.province);
                        sb = new StringBuilder();
                    }
                }
                else if (ch == '乡' || ch == '村' || ch == '县' || ch == '镇')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    sb.Append(ch);
                    ca.county = sb.ToString();
                    whole.Append(ca.county);
                    sb = new StringBuilder();
                }
                else if (ch == '巷')
                {
                }
                else if (ch == '楼' || ch == '弄' || ch == '号' || ch == '室')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    string substr = NumeralUtil.ConvertChineseNumeral2Arabic(sb.ToString());
                    int    x;
                    sb.Append(ch);
                    if (Int32.TryParse(substr, out x))
                    {
                        if (ch == '楼')
                        {
                            ca.floor = sb.ToString();
                        }
                        else if (ch == '弄')
                        {
                            ca.lane = sb.ToString();
                        }
                        else if (ch == '号')
                        {
                            ca.no = sb.ToString();
                        }
                        else if (ch == '室')
                        {
                            ca.room = sb.ToString();
                        }
                        whole.Append(sb.ToString());
                        sb = new StringBuilder();
                    }
                }
                else if (ch == '道' || ch == '路' || ch == '街')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    sb.Append(ch);
                    ca.street = sb.ToString();
                    whole.Append(ca.street);
                    sb = new StringBuilder();
                }
                else if (ch == '(' || ch == '(')
                {
                    sb = new StringBuilder();
                    sb.Append(ch);
                }
                else if (ch == ')' || ch == ')')
                {
                    sb.Append(ch);
                    string extra1 = sb.ToString();
                    whole.Append(extra1);
                    ca.extra = extra1;
                    sb       = new StringBuilder();
                }
                else if (CharacterUtil.IsChinesePunctuation(ch) || (ch == ' ' || ch == ' '))
                {
                    break;
                }
                else if (ch == '大')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    if (i + 1 < chars.Length)
                    {
                        char nextchar = chars[i + 1];

                        if (nextchar == '桥' || nextchar == '厦')
                        {
                            string extra1 = sb.ToString() + "大" + nextchar;
                            whole.Append(extra1);
                            if (nextchar == '桥')
                            {
                                ca.extra += extra1;
                            }
                            else
                            {
                                ca.building = extra1;
                            }
                            i += 2 - 1;
                            sb = new StringBuilder();
                        }
                        else if (i + 2 < chars.Length && nextchar == '酒')
                        {
                            char nextchar2 = chars[i + 2];

                            if (nextchar2 == '店')
                            {
                                string extra1 = sb.ToString() + "大" + nextchar + nextchar2;
                                string city   = GetMaximumMatch(extra1, 0, 5);  //城市或省份
                                if (city != null)
                                {
                                    ca.city = city;
                                    whole.Append(ca.city);

                                    extra1 = extra1.Substring(ca.city.Length);
                                }
                                whole.Append(extra1);
                                ca.building = extra1;
                                i          += 3 - 1;
                                sb          = new StringBuilder();
                            }
                        }
                    }
                }
                else if (ch == '餐')
                {
                    if (sb.Length == 0)
                    {
                        sb.Append(ch);
                        continue;
                    }
                    if (i + 1 < chars.Length)
                    {
                        char nextchar = chars[i + 1];
                        if (nextchar == '厅')
                        {
                            string extra1 = sb.ToString() + "餐" + nextchar;
                            whole.Append(extra1);
                            ca.extra += extra1;
                            i        += 2 - 1;
                            sb        = new StringBuilder();
                        }
                    }
                }
                else
                {
                    //if (sb.Length == 0)
                    //    lastStartPos = i;
                    sb.Append(ch);
                    string extra = sb.ToString();
                    if (extra.EndsWith("中心") || extra.EndsWith("酒店"))
                    {
                        string city = GetMaximumMatch(extra, 0, 5); //城市
                        if (city != null)
                        {
                            ca.city = city;
                            extra   = extra.Substring(city.Length);
                        }
                        ca.building = extra;
                        whole.Append(extra);
                        if (i + 2 < chars.Length && chars[i + 1] == '大' && chars[i + 2] == '厦')  //处理 "中心大厦"
                        {
                            ca.building += "大厦";
                            whole.Append("大厦");
                            i += 2;
                            sb = new StringBuilder();
                            continue;
                        }
                        sb = new StringBuilder();
                    }
                }
            }
            if (whole.Length > 0)
            {
                if (sb.Length > 0)
                {
                    ca.extra = sb.ToString();
                }
                prc.Add(ParseResult.Create(whole.ToString(), startIndex, POSType.D_S, ca));
            }
            return(prc);
        }
Пример #3
0
        public ParseResultCollection Parse(int startIndex)
        {
            string text = NumeralUtil.ConvertChineseNumeral2Arabic(context.Text);
            ParseResultCollection prc = new ParseResultCollection();

            int           boundary      = Math.Min(maxDateTimeTextLength, text.Length - startIndex);
            string        temp          = text.Substring(startIndex, boundary);
            StringBuilder sbDateText    = new StringBuilder();
            StringBuilder sbPatternText = new StringBuilder();
            StringBuilder sbText        = new StringBuilder();
            int           strLen        = 0;
            int           i;
            char          prevCh     = ' ';
            bool          nonNumeric = false;

            for (i = 0; i < boundary; i++)
            {
                char ch = temp[i];

                if (NumeralUtil.IsArabicNumeral(ch))
                {
                    sbDateText.Append(ch);
                    sbText.Append(ch);
                    strLen++;
                }
                else if (ch == '大' || ch == '前' || ch == '昨' || ch == '明' || ch == '今' || ch == '后' || ch == '去')
                {
                }
                else if (ch == '周')
                {
                    if (prevCh == '上')
                    {
                        nonNumeric = true;
                        sbText.Append(prevCh);
                        sbText.Append(ch);
                        break;
                    }
                }
                else if (ch == '天')
                {
                    if (prevCh == '前' || prevCh == '昨' || prevCh == '明' || prevCh == '今' || prevCh == '后')
                    {
                        nonNumeric = true;
                        sbText.Append(prevCh);
                        sbText.Append(ch);
                        break;
                    }
                }
                else if (ch == '年')
                {
                    if (prevCh == '去' || prevCh == '前' || prevCh == '今' || prevCh == '后')
                    {
                        nonNumeric = true;
                        sbText.Append(prevCh);
                        sbText.Append(ch);
                        break;
                    }
                    if (strLen == 0)
                    {
                        return(prc);
                    }
                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('y', strLen));
                    sbPatternText.Append(ch);
                    strLen = 0;
                    sbText.Append(ch);
                }
                else if (ch == '日')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('d', strLen));
                    sbPatternText.Append(ch);
                    strLen = 0;
                    sbText.Append(ch);
                }
                else if (ch == '月')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('M', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == '分')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('m', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == '秒')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('s', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == '点')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('h', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == ' ')
                {
                    sbText.Append(ch);
                    continue;
                }
                else
                {
                    break;
                }
                prevCh = ch;
            }
            if (sbText.Length > 0 && nonNumeric == true)
            {
                prc.Add(ParseResult.Create(sbText.ToString(), startIndex, POSType.D_T));
                return(prc);
            }
            if (sbDateText.Length == 0 || sbPatternText.Length == 0)
            {
                return(prc);
            }
            DateTime?dt = DateUtil.ParseDate(sbDateText.ToString(), sbPatternText.ToString());

            if (dt != null)
            {
                string result = sbText.ToString();
                prc.Add(ParseResult.Create(result, startIndex, POSType.D_T, dt));
            }
            return(prc);
        }