Example #1
0
        public ParseResultCollection Parse(int startIndex)
        {
            string                _text   = context.Text;
            ParserPattern         _format = context.Pattern;
            char                  ch;
            int                   i   = startIndex;
            StringBuilder         sb  = new StringBuilder(6);
            ParseResultCollection prc = new ParseResultCollection();

            ch = _text[i];
            while (NumeralUtil.IsArabicNumeral(ch) || (ch >= '0' && ch <= '9') && i < _text.Length)
            {
                sb.Append(ch);
                ch = _text[++i];
            }
            string source = sb.ToString();

            if (_format == ParserPattern.China)
            {
                if (source.Length != 6)
                {
                    return(prc);
                }
            }
            else if (_format == ParserPattern.NorthAmerica)
            {
                if (source.Length != 5)
                {
                    return(prc);
                }
            }
            prc.Add(ParseResult.Create(source.ToString(), startIndex, POSType.A_M));

            return(prc);
        }
Example #2
0
 public static ParserContext CreateParserContext(string text, ParserPattern pattern)
 {
     ParserContext pc = new ParserContext();
     pc.Pattern = pattern;
     pc.Text = text;
     return pc;
 }
Example #3
0
        void AssignPhoneMain(StringBuilder segment, PhoneNo phone)
        {
            ParserPattern _pattern      = context.Pattern;
            char          separator     = ' ';
            int           bSeparatorLen = 0;

            if (_pattern == ParserPattern.NorthAmerica)
            {
                separator = '-';
            }
            if (phone.Main == null)
            {
                phone.Main = segment.ToString();
            }
            else if (phone.Main.Length < 5)
            {
                phone.Main   += separator + segment.ToString();
                bSeparatorLen = 1;
            }
            if (IsMobileNo(phone.Main, _pattern))
            {
                phone.IsMobile = true;
            }
            else
            {
                //非手机号码,8位
                if (_pattern == ParserPattern.China && phone.Main.Length > 8 + bSeparatorLen)
                {
                    phone.Main = null;
                }
            }
        }
Example #4
0
 /*
  * 中国移动:
  * 2G号段:134、135、136、137、138、139、150、151、152、157、158、159;
  * 3G号段:187、188;182(TD-SCDMA)
  *
  * 中国联通:
  * 2G号段:130、131、132、155、156;
  * 3G号段:185、186;
  *
  * 中国电信:
  * 2G号段:133、153;
  * 3G号段:180、189
  */
 internal static bool IsChineseMobilePrefix(char[] chars, ParserPattern pattern)
 {
     if (chars.Length < 3)
     {
         throw new ArgumentException("To determine mobile prefix, the text length must be longer than 3");
     }
     if (pattern == ParserPattern.China)
     {
         if (chars[0] != '1')
         {
             return(false);
         }
         if (chars[1] != '3' && chars[1] != '8' && chars[1] != '5')     //前缀必须是13, 15, 18
         {
             return(false);
         }
         if (chars[1] == '5' && chars[2] == '4') //联通或电信 除154外
         {
             return(false);
         }
         if (chars[1] == '8' && (chars[2] == '1' || chars[2] == '3' || chars[2] == '4'))   //网通前缀: 189, 188, 180, 185, 186, 187
         {
             return(false);
         }
     }
     if (pattern == ParserPattern.NorthAmerica)
     {
         return(true);
     }
     return(true);
 }
Example #5
0
        public static ParserContext CreateParserContext(string text, ParserPattern pattern)
        {
            ParserContext pc = new ParserContext();

            pc.Pattern = pattern;
            pc.Text    = text;
            return(pc);
        }
Example #6
0
        public ParseResultCollection Recognize(string text, ParserPattern pattern)
        {
            ParserContext context = new ParserContext();

            context.Pattern = pattern;
            context.Text    = text;

            ParseResultCollection result = new ParseResultCollection();

            char[] chars = text.ToCharArray();

            int i = 0;

            while (i < chars.Length)
            {
                char c = chars[i];

                if (CharacterUtil.IsChinesePunctuation(c))
                {
                    i++;
                    continue;
                }
                bool isFound = false;
                //扫描地名(优先于姓名,用于排除不正确人名)
                foreach (ConstructorInfo ci in parserConstructors)
                {
                    IParser parser = ci.Invoke(new object[] { context }) as IParser;

                    try
                    {
                        ParseResultCollection prc = parser.Parse(i);

                        if (prc.Count > 0)
                        {
                            foreach (ParseResult pr in prc)
                            {
                                result.Add(pr);
                                i += pr.Length;
                            }
                            isFound = true;
                            break;
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                    }

                    if (!isFound)
                    {
                        i++;
                    }
                }
            }
            return(result);
        }
Example #7
0
 public static bool IsMobileNo(string text, ParserPattern pattern)
 {
     char[] chars = text.Trim().ToCharArray();
     if (pattern == ParserPattern.China)
     {
         if (chars.Length != 11)
             return false;
     }
     return IsChineseMobilePrefix(chars, pattern);
 }
        public ParseResultCollection Recognize(string text, ParserPattern pattern)
        {
            ParserContext context = new ParserContext();
            context.Pattern = pattern;
            context.Text = text;

            ParseResultCollection result = new ParseResultCollection();

            char[] chars = text.ToCharArray();

            int i = 0;

            while (i < chars.Length)
            {
                char c = chars[i];

                if (CharacterUtil.IsChinesePunctuation(c))
                {
                    i++;
                    continue;
                }
                bool isFound = false;
                //扫描地名(优先于姓名,用于排除不正确人名)
                foreach (ConstructorInfo ci in parserConstructors)
                {
                    IParser parser = ci.Invoke(new object[] { context }) as IParser;

                    try
                    {
                        ParseResultCollection prc = parser.Parse(i);

                        if (prc.Count > 0)
                        {
                            foreach (ParseResult pr in prc)
                            {
                                result.Add(pr);
                                i += pr.Length;
                            }
                            isFound = true;
                            break;
                        }
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex);
                    }

                    if (!isFound)
                    {
                        i++;
                    }
                }
            }
            return result;
        }
Example #9
0
 public static bool IsMobileNo(string text, ParserPattern pattern)
 {
     char[] chars = text.Trim().ToCharArray();
     if (pattern == ParserPattern.China)
     {
         if (chars.Length != 11)
         {
             return(false);
         }
     }
     return(IsChineseMobilePrefix(chars, pattern));
 }
Example #10
0
 bool IsAllowedChar(char ch, ParserPattern pattern)
 {
     if (pattern == ParserPattern.China)
     {
         if (ch >= '0' && ch <= '9')
             return true;
         if (NumeralUtil.IsArabicNumeral(ch))
             return true;
         if (ch == '-' || ch == '-' || ch == '—')
             return true;
         if (ch == '(' || ch == '(' || ch == ')' || ch == ')')
             return true;
         if (ch == '+')
             return true;
         if (ch == '#')
             return true;
         if (ch == ' ' || ch == ' ')
             return true;
     }
     else if (pattern == ParserPattern.NorthAmerica)
     {
         if (NumeralUtil.IsArabicNumeral(ch))
             return true;
         if (ch == '-' || ch == '-')
             return true;
         if (ch == '(' || ch == ')')
             return true;
         if (ch == '+')
             return true;
         if (ch == ' ')
             return true;
         if (ch == 'e' || ch == 'x' || ch == 't' || ch == '.')
             return true;
     }
     return false;
 }
Example #11
0
        /*
         中国移动:
        2G号段:134、135、136、137、138、139、150、151、152、157、158、159;
        3G号段:187、188;182(TD-SCDMA)

        中国联通:
        2G号段:130、131、132、155、156;
        3G号段:185、186;

        中国电信:
        2G号段:133、153;
        3G号段:180、189
         */
        internal static bool IsChineseMobilePrefix(char[] chars, ParserPattern pattern)
        {
            if (chars.Length < 3)
                throw new ArgumentException("To determine mobile prefix, the text length must be longer than 3");
            if (pattern == ParserPattern.China)
            {
                if (chars[0] != '1')
                    return false;
                if (chars[1] != '3' && chars[1] != '8' && chars[1] != '5')     //前缀必须是13, 15, 18
                    return false;
                if (chars[1] == '5' && chars[2] == '4') //联通或电信 除154外
                    return false;
                if (chars[1] == '8' && (chars[2] == '1' || chars[2] == '3' || chars[2] == '4'))   //网通前缀: 189, 188, 180, 185, 186, 187
                    return false;
            } if (pattern == ParserPattern.NorthAmerica)
            {
                return true;
            }
            return true;
        }
Example #12
0
 bool IsAllowedChar(char ch, ParserPattern pattern)
 {
     if (pattern == ParserPattern.China)
     {
         if (ch >= '0' && ch <= '9')
         {
             return(true);
         }
         if (NumeralUtil.IsArabicNumeral(ch))
         {
             return(true);
         }
         if (ch == '-' || ch == '-' || ch == '—')
         {
             return(true);
         }
         if (ch == '(' || ch == '(' || ch == ')' || ch == ')')
         {
             return(true);
         }
         if (ch == '+')
         {
             return(true);
         }
         if (ch == '#')
         {
             return(true);
         }
         if (ch == ' ' || ch == ' ')
         {
             return(true);
         }
     }
     else if (pattern == ParserPattern.NorthAmerica)
     {
         if (NumeralUtil.IsArabicNumeral(ch))
         {
             return(true);
         }
         if (ch == '-' || ch == '-')
         {
             return(true);
         }
         if (ch == '(' || ch == ')')
         {
             return(true);
         }
         if (ch == '+')
         {
             return(true);
         }
         if (ch == ' ')
         {
             return(true);
         }
         if (ch == 'e' || ch == 'x' || ch == 't' || ch == '.')
         {
             return(true);
         }
     }
     return(false);
 }
Example #13
0
        public ParseResultCollection Parse(int startIndex)
        {
            string                _text    = context.Text;
            ParserPattern         _pattern = context.Pattern;
            int                   k        = startIndex;
            char                  ch;
            StringBuilder         sb  = new StringBuilder(10);
            ParseResultCollection prc = new ParseResultCollection();

            if (_text[startIndex] == ' ' || _text[startIndex] == ' ')
            {
                return(prc);
            }

            int braceStartPos = -1;

            while (k < _text.Length)
            {
                ch = _text[k];
                if (!IsAllowedChar(ch, _pattern))
                {
                    break;
                }
                if (ch >= '0' && ch <= '9')
                {
                    ch = (char)(ch - '0' + '0');
                }
                if (ch == ' ')
                {
                    ch = ' ';
                }
                else if (ch == '(')
                {
                    ch = '(';
                }
                else if (ch == ')')
                {
                    ch = ')';
                }
                else if (ch == '-' || ch == '—')
                {
                    ch = '-';
                }

                if (ch == '(')
                {
                    braceStartPos = k;
                }
                else if (ch == ')')
                {
                    braceStartPos = -1;
                }
                sb.Append(ch);
                k++;
            }
            string allowedString = sb.ToString().TrimEnd();

            if (braceStartPos >= 0)
            {
                allowedString = allowedString.Substring(0, braceStartPos);
            }

            if (allowedString.Length < 3 || allowedString.Length == 4)
            {
                return(prc);
            }

            bool bNumberInBrace      = false;
            bool bCountryCodeStarted = false;
            bool bAreaCodeStarted    = false;
            bool bExtStarted         = false;
            int  i = 0;

            StringBuilder segment = new StringBuilder();
            StringBuilder whole   = new StringBuilder();

            PhoneNo phone = new PhoneNo();

            if (_pattern == ParserPattern.China)
            {
                while (i < allowedString.Length)
                {
                    ch = allowedString[i];
                    if (ch == '(')
                    {
                        bNumberInBrace      = true;
                        bCountryCodeStarted = false;
                        whole.Append(ch);
                    }
                    else if (NumeralUtil.IsArabicNumeral(ch))
                    {
                        if (segment.Length == 0 && !bAreaCodeStarted &&
                            phone.AreaCode == null && !bCountryCodeStarted)
                        {
                            bAreaCodeStarted = true;
                        }

                        segment.Append(ch);
                        whole.Append(ch);
                    }
                    else if (ch == ')' && bNumberInBrace)
                    {
                        if (bCountryCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.CountryCode = segment.ToString();
                            }
                            bCountryCodeStarted = false;
                        }
                        if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0 && (segment[0] == '0' ? segment.Length <= 4 : segment.Length <= 3))  //城市代码以0开头,最多4个数字;不以0开头,三个数字
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        whole.Append(ch);
                        segment        = new StringBuilder();
                        bNumberInBrace = false;
                    }
                    else if (ch == ' ')
                    {
                        if (bCountryCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.CountryCode = segment.ToString();
                            }
                            bCountryCodeStarted = false;
                        }
                        else if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        else if (segment.Length > 0)
                        {
                            AssignPhoneMain(segment, phone);
                        }
                        segment             = new StringBuilder();
                        bCountryCodeStarted = false;
                        whole.Append(ch);
                    }
                    else if (ch == '-' || ch == '#')
                    {
                        if (segment[0] == '0' && (segment.Length == 3 || segment.Length == 4))
                        {
                            phone.AreaCode = segment.ToString();
                        }
                        else if (segment.Length > 0)
                        {
                            AssignPhoneMain(segment, phone);
                            bExtStarted = true;
                        }
                        segment = new StringBuilder();
                        whole.Append(ch);
                    }
                    else if (ch == '+')
                    {
                        whole.Append(ch);
                        bCountryCodeStarted = true;
                    }
                    i++;
                }
                if (segment.Length > 0)
                {
                    AssignPhoneMain(segment, phone);
                    if (bExtStarted)
                    {
                        phone.Extension = segment.ToString();
                        bExtStarted     = false;
                    }
                }
            }
            else if (_pattern == ParserPattern.NorthAmerica)
            {
                while (i < allowedString.Length)
                {
                    ch = allowedString[i];

                    if (NumeralUtil.IsArabicNumeral(ch))
                    {
                        whole.Append(ch);
                        segment.Append(ch);
                    }
                    else if (ch == ' ')
                    {
                        whole.Append(ch);
                    }
                    else if (ch == '(')
                    {
                        bAreaCodeStarted = true;
                        whole.Append(ch);
                    }
                    else if (ch == ')')
                    {
                        if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        segment = new StringBuilder();
                        whole.Append(ch);
                    }
                    else if (ch == '-')
                    {
                        if (bCountryCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.CountryCode = segment.ToString();
                            }
                            bCountryCodeStarted = false;
                            bAreaCodeStarted    = true;
                        }
                        else if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        else if (segment.Length > 0)
                        {
                            AssignPhoneMain(segment, phone);
                        }
                        whole.Append(ch);
                        segment = new StringBuilder();
                    }
                    else if (ch == '+')
                    {
                        bCountryCodeStarted = true;
                        whole.Append(ch);
                    }
                    else if (ch == '.')
                    {
                        if (segment.ToString() != "ext")
                        {
                            break;
                        }

                        whole.Append("ext.");
                    }
                    else if (ch == 'e' || ch == 'x' || ch == 't')
                    {
                        segment.Append(ch);
                    }
                    i++;
                }
                if (segment.Length > 0)
                {
                    AssignPhoneMain(segment, phone);
                    if (bExtStarted)
                    {
                        phone.Extension = segment.ToString();
                        bExtStarted     = false;
                    }
                }
            }
            else
            {
                throw new NotImplementedException("Phone No. in " + _pattern.ToString() + " is not implemented in the parser.");
            }
            if (whole.Length > 0 && phone.Main != null)
            {
                prc.Add(ParseResult.Create(whole.ToString(), startIndex, POSType.A_M, phone));
            }
            return(prc);
        }