public void TestConvertChineseNumeral2Arabic() { Assert.AreEqual( NumeralUtil.ConvertChineseNumeral2Arabic("你好,这里有二百三十五块钱,收好了,总共二千二百五十二块。二减五等于负三万零五十。"), "你好,这里有235块钱,收好了,总共2252块。2减5等于-30050。" ); Assert.AreEqual( NumeralUtil.ConvertChineseNumeral2Arabic("公元二零零五年四月"), "公元2005年4月" ); Assert.AreEqual( NumeralUtil.ConvertChineseNumeral2Arabic("公元前四五五年"), "公元前455年" ); }
public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParseResultCollection prc = new ParseResultCollection(); string temp = _text.Substring(startIndex, Math.Min(maxChineseAddressLength, _text.Length - startIndex)); char[] chars = temp.ToCharArray(); //int lastStartPos = 0; StringBuilder sb = new StringBuilder(); StringBuilder whole = new StringBuilder(); ChineseAddress ca = new ChineseAddress(); int startpos = 0; //TODO: 通过字典找国家名 if (temp.StartsWith("中国")) { startpos = 2; ca.country = "中国"; whole.Append("中国"); } for (int i = startpos; i < chars.Length; i++) { char ch = chars[i]; if (ch == '市' || ch == '场') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); string subStr = sb.ToString(); string city = GetMaximumMatch(subStr, 0, 5); if (city != null) { ca.city = city; whole.Append(ca.city); sb = new StringBuilder(); } } else if (ch == '区') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); string subStr = sb.ToString(); string district = GetMaximumMatch(subStr, 0, 5); if (district != null) { if (!district.EndsWith("区")) { ca.city = district; whole.Append(ca.city); ca.district = subStr.Substring(ca.city.Length); whole.Append(ca.district); } else { //string district = NEParser.GetMaximumMatch(subStr, 0, 5, "district", _cityNames, null); ca.district = district; whole.Append(ca.district); } } else { ca.district = subStr; whole.Append(ca.district); } sb = new StringBuilder(); } else if (ch == '省') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); string subStr = sb.ToString(); string province = GetMaximumMatch(subStr, 0, 5); //省份 if (province != null) { ca.province = province; whole.Append(ca.province); sb = new StringBuilder(); } } else if (ch == '乡' || ch == '村' || ch == '县' || ch == '镇') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); ca.county = sb.ToString(); whole.Append(ca.county); sb = new StringBuilder(); } else if (ch == '巷') { } else if (ch == '楼' || ch == '弄' || ch == '号' || ch == '室') { if (sb.Length == 0) { sb.Append(ch); continue; } string substr = NumeralUtil.ConvertChineseNumeral2Arabic(sb.ToString()); int x; sb.Append(ch); if (Int32.TryParse(substr, out x)) { if (ch == '楼') { ca.floor = sb.ToString(); } else if (ch == '弄') { ca.lane = sb.ToString(); } else if (ch == '号') { ca.no = sb.ToString(); } else if (ch == '室') { ca.room = sb.ToString(); } whole.Append(sb.ToString()); sb = new StringBuilder(); } } else if (ch == '道' || ch == '路' || ch == '街') { if (sb.Length == 0) { sb.Append(ch); continue; } sb.Append(ch); ca.street = sb.ToString(); whole.Append(ca.street); sb = new StringBuilder(); } else if (ch == '(' || ch == '(') { sb = new StringBuilder(); sb.Append(ch); } else if (ch == ')' || ch == ')') { sb.Append(ch); string extra1 = sb.ToString(); whole.Append(extra1); ca.extra = extra1; sb = new StringBuilder(); } else if (CharacterUtil.IsChinesePunctuation(ch) || (ch == ' ' || ch == ' ')) { break; } else if (ch == '大') { if (sb.Length == 0) { sb.Append(ch); continue; } if (i + 1 < chars.Length) { char nextchar = chars[i + 1]; if (nextchar == '桥' || nextchar == '厦') { string extra1 = sb.ToString() + "大" + nextchar; whole.Append(extra1); if (nextchar == '桥') { ca.extra += extra1; } else { ca.building = extra1; } i += 2 - 1; sb = new StringBuilder(); } else if (i + 2 < chars.Length && nextchar == '酒') { char nextchar2 = chars[i + 2]; if (nextchar2 == '店') { string extra1 = sb.ToString() + "大" + nextchar + nextchar2; string city = GetMaximumMatch(extra1, 0, 5); //城市或省份 if (city != null) { ca.city = city; whole.Append(ca.city); extra1 = extra1.Substring(ca.city.Length); } whole.Append(extra1); ca.building = extra1; i += 3 - 1; sb = new StringBuilder(); } } } } else if (ch == '餐') { if (sb.Length == 0) { sb.Append(ch); continue; } if (i + 1 < chars.Length) { char nextchar = chars[i + 1]; if (nextchar == '厅') { string extra1 = sb.ToString() + "餐" + nextchar; whole.Append(extra1); ca.extra += extra1; i += 2 - 1; sb = new StringBuilder(); } } } else { //if (sb.Length == 0) // lastStartPos = i; sb.Append(ch); string extra = sb.ToString(); if (extra.EndsWith("中心") || extra.EndsWith("酒店")) { string city = GetMaximumMatch(extra, 0, 5); //城市 if (city != null) { ca.city = city; extra = extra.Substring(city.Length); } ca.building = extra; whole.Append(extra); if (i + 2 < chars.Length && chars[i + 1] == '大' && chars[i + 2] == '厦') //处理 "中心大厦" { ca.building += "大厦"; whole.Append("大厦"); i += 2; sb = new StringBuilder(); continue; } sb = new StringBuilder(); } } } if (whole.Length > 0) { if (sb.Length > 0) { ca.extra = sb.ToString(); } prc.Add(ParseResult.Create(whole.ToString(), startIndex, POSType.D_S, ca)); } return(prc); }
public ParseResultCollection Parse(int startIndex) { string text = NumeralUtil.ConvertChineseNumeral2Arabic(context.Text); ParseResultCollection prc = new ParseResultCollection(); int boundary = Math.Min(maxDateTimeTextLength, text.Length - startIndex); string temp = text.Substring(startIndex, boundary); StringBuilder sbDateText = new StringBuilder(); StringBuilder sbPatternText = new StringBuilder(); StringBuilder sbText = new StringBuilder(); int strLen = 0; int i; char prevCh = ' '; bool nonNumeric = false; for (i = 0; i < boundary; i++) { char ch = temp[i]; if (NumeralUtil.IsArabicNumeral(ch)) { sbDateText.Append(ch); sbText.Append(ch); strLen++; } else if (ch == '大' || ch == '前' || ch == '昨' || ch == '明' || ch == '今' || ch == '后' || ch == '去') { } else if (ch == '周') { if (prevCh == '上') { nonNumeric = true; sbText.Append(prevCh); sbText.Append(ch); break; } } else if (ch == '天') { if (prevCh == '前' || prevCh == '昨' || prevCh == '明' || prevCh == '今' || prevCh == '后') { nonNumeric = true; sbText.Append(prevCh); sbText.Append(ch); break; } } else if (ch == '年') { if (prevCh == '去' || prevCh == '前' || prevCh == '今' || prevCh == '后') { nonNumeric = true; sbText.Append(prevCh); sbText.Append(ch); break; } if (strLen == 0) { return(prc); } sbDateText.Append(ch); sbPatternText.Append(DateUtil.GeneratePatternText('y', strLen)); sbPatternText.Append(ch); strLen = 0; sbText.Append(ch); } else if (ch == '日') { if (strLen == 0) { return(prc); } sbDateText.Append(ch); sbPatternText.Append(DateUtil.GeneratePatternText('d', strLen)); sbPatternText.Append(ch); strLen = 0; sbText.Append(ch); } else if (ch == '月') { if (strLen == 0) { return(prc); } sbDateText.Append(ch); sbPatternText.Append(DateUtil.GeneratePatternText('M', strLen)); sbPatternText.Append(ch); sbText.Append(ch); strLen = 0; } else if (ch == '分') { if (strLen == 0) { return(prc); } sbDateText.Append(ch); sbPatternText.Append(DateUtil.GeneratePatternText('m', strLen)); sbPatternText.Append(ch); sbText.Append(ch); strLen = 0; } else if (ch == '秒') { if (strLen == 0) { return(prc); } sbDateText.Append(ch); sbPatternText.Append(DateUtil.GeneratePatternText('s', strLen)); sbPatternText.Append(ch); sbText.Append(ch); strLen = 0; } else if (ch == '点') { if (strLen == 0) { return(prc); } sbDateText.Append(ch); sbPatternText.Append(DateUtil.GeneratePatternText('h', strLen)); sbPatternText.Append(ch); sbText.Append(ch); strLen = 0; } else if (ch == ' ') { sbText.Append(ch); continue; } else { break; } prevCh = ch; } if (sbText.Length > 0 && nonNumeric == true) { prc.Add(ParseResult.Create(sbText.ToString(), startIndex, POSType.D_T)); return(prc); } if (sbDateText.Length == 0 || sbPatternText.Length == 0) { return(prc); } DateTime?dt = DateUtil.ParseDate(sbDateText.ToString(), sbPatternText.ToString()); if (dt != null) { string result = sbText.ToString(); prc.Add(ParseResult.Create(result, startIndex, POSType.D_T, dt)); } return(prc); }