public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParserPattern _format = context.Pattern; char ch; int i = startIndex; StringBuilder sb = new StringBuilder(6); ParseResultCollection prc = new ParseResultCollection(); ch = _text[i]; while (NumeralUtil.IsArabicNumeral(ch) || (ch >= '0' && ch <= '9') && i < _text.Length) { sb.Append(ch); ch = _text[++i]; } string source = sb.ToString(); if (_format == ParserPattern.China) { if (source.Length != 6) { return(prc); } } else if (_format == ParserPattern.NorthAmerica) { if (source.Length != 5) { return(prc); } } prc.Add(ParseResult.Create(source.ToString(), startIndex, POSType.A_M)); return(prc); }
public static ParserContext CreateParserContext(string text, ParserPattern pattern) { ParserContext pc = new ParserContext(); pc.Pattern = pattern; pc.Text = text; return pc; }
void AssignPhoneMain(StringBuilder segment, PhoneNo phone) { ParserPattern _pattern = context.Pattern; char separator = ' '; int bSeparatorLen = 0; if (_pattern == ParserPattern.NorthAmerica) { separator = '-'; } if (phone.Main == null) { phone.Main = segment.ToString(); } else if (phone.Main.Length < 5) { phone.Main += separator + segment.ToString(); bSeparatorLen = 1; } if (IsMobileNo(phone.Main, _pattern)) { phone.IsMobile = true; } else { //非手机号码,8位 if (_pattern == ParserPattern.China && phone.Main.Length > 8 + bSeparatorLen) { phone.Main = null; } } }
/* * 中国移动: * 2G号段:134、135、136、137、138、139、150、151、152、157、158、159; * 3G号段:187、188;182(TD-SCDMA) * * 中国联通: * 2G号段:130、131、132、155、156; * 3G号段:185、186; * * 中国电信: * 2G号段:133、153; * 3G号段:180、189 */ internal static bool IsChineseMobilePrefix(char[] chars, ParserPattern pattern) { if (chars.Length < 3) { throw new ArgumentException("To determine mobile prefix, the text length must be longer than 3"); } if (pattern == ParserPattern.China) { if (chars[0] != '1') { return(false); } if (chars[1] != '3' && chars[1] != '8' && chars[1] != '5') //前缀必须是13, 15, 18 { return(false); } if (chars[1] == '5' && chars[2] == '4') //联通或电信 除154外 { return(false); } if (chars[1] == '8' && (chars[2] == '1' || chars[2] == '3' || chars[2] == '4')) //网通前缀: 189, 188, 180, 185, 186, 187 { return(false); } } if (pattern == ParserPattern.NorthAmerica) { return(true); } return(true); }
public static ParserContext CreateParserContext(string text, ParserPattern pattern) { ParserContext pc = new ParserContext(); pc.Pattern = pattern; pc.Text = text; return(pc); }
public ParseResultCollection Recognize(string text, ParserPattern pattern) { ParserContext context = new ParserContext(); context.Pattern = pattern; context.Text = text; ParseResultCollection result = new ParseResultCollection(); char[] chars = text.ToCharArray(); int i = 0; while (i < chars.Length) { char c = chars[i]; if (CharacterUtil.IsChinesePunctuation(c)) { i++; continue; } bool isFound = false; //扫描地名(优先于姓名,用于排除不正确人名) foreach (ConstructorInfo ci in parserConstructors) { IParser parser = ci.Invoke(new object[] { context }) as IParser; try { ParseResultCollection prc = parser.Parse(i); if (prc.Count > 0) { foreach (ParseResult pr in prc) { result.Add(pr); i += pr.Length; } isFound = true; break; } } catch (Exception ex) { Console.WriteLine(ex); } if (!isFound) { i++; } } } return(result); }
public static bool IsMobileNo(string text, ParserPattern pattern) { char[] chars = text.Trim().ToCharArray(); if (pattern == ParserPattern.China) { if (chars.Length != 11) return false; } return IsChineseMobilePrefix(chars, pattern); }
public ParseResultCollection Recognize(string text, ParserPattern pattern) { ParserContext context = new ParserContext(); context.Pattern = pattern; context.Text = text; ParseResultCollection result = new ParseResultCollection(); char[] chars = text.ToCharArray(); int i = 0; while (i < chars.Length) { char c = chars[i]; if (CharacterUtil.IsChinesePunctuation(c)) { i++; continue; } bool isFound = false; //扫描地名(优先于姓名,用于排除不正确人名) foreach (ConstructorInfo ci in parserConstructors) { IParser parser = ci.Invoke(new object[] { context }) as IParser; try { ParseResultCollection prc = parser.Parse(i); if (prc.Count > 0) { foreach (ParseResult pr in prc) { result.Add(pr); i += pr.Length; } isFound = true; break; } } catch (Exception ex) { Console.WriteLine(ex); } if (!isFound) { i++; } } } return result; }
public static bool IsMobileNo(string text, ParserPattern pattern) { char[] chars = text.Trim().ToCharArray(); if (pattern == ParserPattern.China) { if (chars.Length != 11) { return(false); } } return(IsChineseMobilePrefix(chars, pattern)); }
bool IsAllowedChar(char ch, ParserPattern pattern) { if (pattern == ParserPattern.China) { if (ch >= '0' && ch <= '9') return true; if (NumeralUtil.IsArabicNumeral(ch)) return true; if (ch == '-' || ch == '-' || ch == '—') return true; if (ch == '(' || ch == '(' || ch == ')' || ch == ')') return true; if (ch == '+') return true; if (ch == '#') return true; if (ch == ' ' || ch == ' ') return true; } else if (pattern == ParserPattern.NorthAmerica) { if (NumeralUtil.IsArabicNumeral(ch)) return true; if (ch == '-' || ch == '-') return true; if (ch == '(' || ch == ')') return true; if (ch == '+') return true; if (ch == ' ') return true; if (ch == 'e' || ch == 'x' || ch == 't' || ch == '.') return true; } return false; }
/* 中国移动: 2G号段:134、135、136、137、138、139、150、151、152、157、158、159; 3G号段:187、188;182(TD-SCDMA) 中国联通: 2G号段:130、131、132、155、156; 3G号段:185、186; 中国电信: 2G号段:133、153; 3G号段:180、189 */ internal static bool IsChineseMobilePrefix(char[] chars, ParserPattern pattern) { if (chars.Length < 3) throw new ArgumentException("To determine mobile prefix, the text length must be longer than 3"); if (pattern == ParserPattern.China) { if (chars[0] != '1') return false; if (chars[1] != '3' && chars[1] != '8' && chars[1] != '5') //前缀必须是13, 15, 18 return false; if (chars[1] == '5' && chars[2] == '4') //联通或电信 除154外 return false; if (chars[1] == '8' && (chars[2] == '1' || chars[2] == '3' || chars[2] == '4')) //网通前缀: 189, 188, 180, 185, 186, 187 return false; } if (pattern == ParserPattern.NorthAmerica) { return true; } return true; }
bool IsAllowedChar(char ch, ParserPattern pattern) { if (pattern == ParserPattern.China) { if (ch >= '0' && ch <= '9') { return(true); } if (NumeralUtil.IsArabicNumeral(ch)) { return(true); } if (ch == '-' || ch == '-' || ch == '—') { return(true); } if (ch == '(' || ch == '(' || ch == ')' || ch == ')') { return(true); } if (ch == '+') { return(true); } if (ch == '#') { return(true); } if (ch == ' ' || ch == ' ') { return(true); } } else if (pattern == ParserPattern.NorthAmerica) { if (NumeralUtil.IsArabicNumeral(ch)) { return(true); } if (ch == '-' || ch == '-') { return(true); } if (ch == '(' || ch == ')') { return(true); } if (ch == '+') { return(true); } if (ch == ' ') { return(true); } if (ch == 'e' || ch == 'x' || ch == 't' || ch == '.') { return(true); } } return(false); }
public ParseResultCollection Parse(int startIndex) { string _text = context.Text; ParserPattern _pattern = context.Pattern; int k = startIndex; char ch; StringBuilder sb = new StringBuilder(10); ParseResultCollection prc = new ParseResultCollection(); if (_text[startIndex] == ' ' || _text[startIndex] == ' ') { return(prc); } int braceStartPos = -1; while (k < _text.Length) { ch = _text[k]; if (!IsAllowedChar(ch, _pattern)) { break; } if (ch >= '0' && ch <= '9') { ch = (char)(ch - '0' + '0'); } if (ch == ' ') { ch = ' '; } else if (ch == '(') { ch = '('; } else if (ch == ')') { ch = ')'; } else if (ch == '-' || ch == '—') { ch = '-'; } if (ch == '(') { braceStartPos = k; } else if (ch == ')') { braceStartPos = -1; } sb.Append(ch); k++; } string allowedString = sb.ToString().TrimEnd(); if (braceStartPos >= 0) { allowedString = allowedString.Substring(0, braceStartPos); } if (allowedString.Length < 3 || allowedString.Length == 4) { return(prc); } bool bNumberInBrace = false; bool bCountryCodeStarted = false; bool bAreaCodeStarted = false; bool bExtStarted = false; int i = 0; StringBuilder segment = new StringBuilder(); StringBuilder whole = new StringBuilder(); PhoneNo phone = new PhoneNo(); if (_pattern == ParserPattern.China) { while (i < allowedString.Length) { ch = allowedString[i]; if (ch == '(') { bNumberInBrace = true; bCountryCodeStarted = false; whole.Append(ch); } else if (NumeralUtil.IsArabicNumeral(ch)) { if (segment.Length == 0 && !bAreaCodeStarted && phone.AreaCode == null && !bCountryCodeStarted) { bAreaCodeStarted = true; } segment.Append(ch); whole.Append(ch); } else if (ch == ')' && bNumberInBrace) { if (bCountryCodeStarted) { if (segment.Length > 0) { phone.CountryCode = segment.ToString(); } bCountryCodeStarted = false; } if (bAreaCodeStarted) { if (segment.Length > 0 && (segment[0] == '0' ? segment.Length <= 4 : segment.Length <= 3)) //城市代码以0开头,最多4个数字;不以0开头,三个数字 { phone.AreaCode = segment.ToString(); } bAreaCodeStarted = false; } whole.Append(ch); segment = new StringBuilder(); bNumberInBrace = false; } else if (ch == ' ') { if (bCountryCodeStarted) { if (segment.Length > 0) { phone.CountryCode = segment.ToString(); } bCountryCodeStarted = false; } else if (bAreaCodeStarted) { if (segment.Length > 0) { phone.AreaCode = segment.ToString(); } bAreaCodeStarted = false; } else if (segment.Length > 0) { AssignPhoneMain(segment, phone); } segment = new StringBuilder(); bCountryCodeStarted = false; whole.Append(ch); } else if (ch == '-' || ch == '#') { if (segment[0] == '0' && (segment.Length == 3 || segment.Length == 4)) { phone.AreaCode = segment.ToString(); } else if (segment.Length > 0) { AssignPhoneMain(segment, phone); bExtStarted = true; } segment = new StringBuilder(); whole.Append(ch); } else if (ch == '+') { whole.Append(ch); bCountryCodeStarted = true; } i++; } if (segment.Length > 0) { AssignPhoneMain(segment, phone); if (bExtStarted) { phone.Extension = segment.ToString(); bExtStarted = false; } } } else if (_pattern == ParserPattern.NorthAmerica) { while (i < allowedString.Length) { ch = allowedString[i]; if (NumeralUtil.IsArabicNumeral(ch)) { whole.Append(ch); segment.Append(ch); } else if (ch == ' ') { whole.Append(ch); } else if (ch == '(') { bAreaCodeStarted = true; whole.Append(ch); } else if (ch == ')') { if (bAreaCodeStarted) { if (segment.Length > 0) { phone.AreaCode = segment.ToString(); } bAreaCodeStarted = false; } segment = new StringBuilder(); whole.Append(ch); } else if (ch == '-') { if (bCountryCodeStarted) { if (segment.Length > 0) { phone.CountryCode = segment.ToString(); } bCountryCodeStarted = false; bAreaCodeStarted = true; } else if (bAreaCodeStarted) { if (segment.Length > 0) { phone.AreaCode = segment.ToString(); } bAreaCodeStarted = false; } else if (segment.Length > 0) { AssignPhoneMain(segment, phone); } whole.Append(ch); segment = new StringBuilder(); } else if (ch == '+') { bCountryCodeStarted = true; whole.Append(ch); } else if (ch == '.') { if (segment.ToString() != "ext") { break; } whole.Append("ext."); } else if (ch == 'e' || ch == 'x' || ch == 't') { segment.Append(ch); } i++; } if (segment.Length > 0) { AssignPhoneMain(segment, phone); if (bExtStarted) { phone.Extension = segment.ToString(); bExtStarted = false; } } } else { throw new NotImplementedException("Phone No. in " + _pattern.ToString() + " is not implemented in the parser."); } if (whole.Length > 0 && phone.Main != null) { prc.Add(ParseResult.Create(whole.ToString(), startIndex, POSType.A_M, phone)); } return(prc); }