Пример #1
0
        public ParseResultCollection Parse(int startIndex)
        {
            string _text = context.Text;
            ParseResultCollection prc = new ParseResultCollection();

            int  i  = startIndex;
            char ch = _text[i];

            while ((NumeralUtil.IsArabicNumeral(ch) || NumeralUtil.IsChineseNumeralChars(ch) || ch == '.') &&
                   i + 1 < _text.Length)
            {
                if (i == startIndex && (NumeralUtil.IsChineseGenDigit(ch) && ch != '十'))   //首字出现进位符
                {
                    return(prc);
                }
                ch = _text[++i];
            }
            if (i == startIndex)
            {
                return(prc);
            }
            int j = Math.Min(i, _text.Length);

            if (IsChineseQuantity(_text[j]))
            {
                prc.Add(ParseResult.Create(_text.Substring(startIndex, i - startIndex), startIndex, POSType.A_M));
                prc.Add(ParseResult.Create(_text[i].ToString(), i, POSType.A_Q));
            }
            return(prc);
        }
Пример #2
0
        public ParseResultCollection Parse(int startIndex)
        {
            string                _text   = context.Text;
            ParserPattern         _format = context.Pattern;
            char                  ch;
            int                   i   = startIndex;
            StringBuilder         sb  = new StringBuilder(6);
            ParseResultCollection prc = new ParseResultCollection();

            ch = _text[i];
            while (NumeralUtil.IsArabicNumeral(ch) || (ch >= '0' && ch <= '9') && i < _text.Length)
            {
                sb.Append(ch);
                ch = _text[++i];
            }
            string source = sb.ToString();

            if (_format == ParserPattern.China)
            {
                if (source.Length != 6)
                {
                    return(prc);
                }
            }
            else if (_format == ParserPattern.NorthAmerica)
            {
                if (source.Length != 5)
                {
                    return(prc);
                }
            }
            prc.Add(ParseResult.Create(source.ToString(), startIndex, POSType.A_M));

            return(prc);
        }
Пример #3
0
        public ParseResultCollection Parse(int startIndex)
        {
            string                _text    = context.Text;
            ParserPattern         _pattern = context.Pattern;
            int                   k        = startIndex;
            char                  ch;
            StringBuilder         sb  = new StringBuilder(10);
            ParseResultCollection prc = new ParseResultCollection();

            if (_text[startIndex] == ' ' || _text[startIndex] == ' ')
            {
                return(prc);
            }

            int braceStartPos = -1;

            while (k < _text.Length)
            {
                ch = _text[k];
                if (!IsAllowedChar(ch, _pattern))
                {
                    break;
                }
                if (ch >= '0' && ch <= '9')
                {
                    ch = (char)(ch - '0' + '0');
                }
                if (ch == ' ')
                {
                    ch = ' ';
                }
                else if (ch == '(')
                {
                    ch = '(';
                }
                else if (ch == ')')
                {
                    ch = ')';
                }
                else if (ch == '-' || ch == '—')
                {
                    ch = '-';
                }

                if (ch == '(')
                {
                    braceStartPos = k;
                }
                else if (ch == ')')
                {
                    braceStartPos = -1;
                }
                sb.Append(ch);
                k++;
            }
            string allowedString = sb.ToString().TrimEnd();

            if (braceStartPos >= 0)
            {
                allowedString = allowedString.Substring(0, braceStartPos);
            }

            if (allowedString.Length < 3 || allowedString.Length == 4)
            {
                return(prc);
            }

            bool bNumberInBrace      = false;
            bool bCountryCodeStarted = false;
            bool bAreaCodeStarted    = false;
            bool bExtStarted         = false;
            int  i = 0;

            StringBuilder segment = new StringBuilder();
            StringBuilder whole   = new StringBuilder();

            PhoneNo phone = new PhoneNo();

            if (_pattern == ParserPattern.China)
            {
                while (i < allowedString.Length)
                {
                    ch = allowedString[i];
                    if (ch == '(')
                    {
                        bNumberInBrace      = true;
                        bCountryCodeStarted = false;
                        whole.Append(ch);
                    }
                    else if (NumeralUtil.IsArabicNumeral(ch))
                    {
                        if (segment.Length == 0 && !bAreaCodeStarted &&
                            phone.AreaCode == null && !bCountryCodeStarted)
                        {
                            bAreaCodeStarted = true;
                        }

                        segment.Append(ch);
                        whole.Append(ch);
                    }
                    else if (ch == ')' && bNumberInBrace)
                    {
                        if (bCountryCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.CountryCode = segment.ToString();
                            }
                            bCountryCodeStarted = false;
                        }
                        if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0 && (segment[0] == '0' ? segment.Length <= 4 : segment.Length <= 3))  //城市代码以0开头,最多4个数字;不以0开头,三个数字
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        whole.Append(ch);
                        segment        = new StringBuilder();
                        bNumberInBrace = false;
                    }
                    else if (ch == ' ')
                    {
                        if (bCountryCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.CountryCode = segment.ToString();
                            }
                            bCountryCodeStarted = false;
                        }
                        else if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        else if (segment.Length > 0)
                        {
                            AssignPhoneMain(segment, phone);
                        }
                        segment             = new StringBuilder();
                        bCountryCodeStarted = false;
                        whole.Append(ch);
                    }
                    else if (ch == '-' || ch == '#')
                    {
                        if (segment[0] == '0' && (segment.Length == 3 || segment.Length == 4))
                        {
                            phone.AreaCode = segment.ToString();
                        }
                        else if (segment.Length > 0)
                        {
                            AssignPhoneMain(segment, phone);
                            bExtStarted = true;
                        }
                        segment = new StringBuilder();
                        whole.Append(ch);
                    }
                    else if (ch == '+')
                    {
                        whole.Append(ch);
                        bCountryCodeStarted = true;
                    }
                    i++;
                }
                if (segment.Length > 0)
                {
                    AssignPhoneMain(segment, phone);
                    if (bExtStarted)
                    {
                        phone.Extension = segment.ToString();
                        bExtStarted     = false;
                    }
                }
            }
            else if (_pattern == ParserPattern.NorthAmerica)
            {
                while (i < allowedString.Length)
                {
                    ch = allowedString[i];

                    if (NumeralUtil.IsArabicNumeral(ch))
                    {
                        whole.Append(ch);
                        segment.Append(ch);
                    }
                    else if (ch == ' ')
                    {
                        whole.Append(ch);
                    }
                    else if (ch == '(')
                    {
                        bAreaCodeStarted = true;
                        whole.Append(ch);
                    }
                    else if (ch == ')')
                    {
                        if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        segment = new StringBuilder();
                        whole.Append(ch);
                    }
                    else if (ch == '-')
                    {
                        if (bCountryCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.CountryCode = segment.ToString();
                            }
                            bCountryCodeStarted = false;
                            bAreaCodeStarted    = true;
                        }
                        else if (bAreaCodeStarted)
                        {
                            if (segment.Length > 0)
                            {
                                phone.AreaCode = segment.ToString();
                            }
                            bAreaCodeStarted = false;
                        }
                        else if (segment.Length > 0)
                        {
                            AssignPhoneMain(segment, phone);
                        }
                        whole.Append(ch);
                        segment = new StringBuilder();
                    }
                    else if (ch == '+')
                    {
                        bCountryCodeStarted = true;
                        whole.Append(ch);
                    }
                    else if (ch == '.')
                    {
                        if (segment.ToString() != "ext")
                        {
                            break;
                        }

                        whole.Append("ext.");
                    }
                    else if (ch == 'e' || ch == 'x' || ch == 't')
                    {
                        segment.Append(ch);
                    }
                    i++;
                }
                if (segment.Length > 0)
                {
                    AssignPhoneMain(segment, phone);
                    if (bExtStarted)
                    {
                        phone.Extension = segment.ToString();
                        bExtStarted     = false;
                    }
                }
            }
            else
            {
                throw new NotImplementedException("Phone No. in " + _pattern.ToString() + " is not implemented in the parser.");
            }
            if (whole.Length > 0 && phone.Main != null)
            {
                prc.Add(ParseResult.Create(whole.ToString(), startIndex, POSType.A_M, phone));
            }
            return(prc);
        }
Пример #4
0
 bool IsAllowedChar(char ch, ParserPattern pattern)
 {
     if (pattern == ParserPattern.China)
     {
         if (ch >= '0' && ch <= '9')
         {
             return(true);
         }
         if (NumeralUtil.IsArabicNumeral(ch))
         {
             return(true);
         }
         if (ch == '-' || ch == '-' || ch == '—')
         {
             return(true);
         }
         if (ch == '(' || ch == '(' || ch == ')' || ch == ')')
         {
             return(true);
         }
         if (ch == '+')
         {
             return(true);
         }
         if (ch == '#')
         {
             return(true);
         }
         if (ch == ' ' || ch == ' ')
         {
             return(true);
         }
     }
     else if (pattern == ParserPattern.NorthAmerica)
     {
         if (NumeralUtil.IsArabicNumeral(ch))
         {
             return(true);
         }
         if (ch == '-' || ch == '-')
         {
             return(true);
         }
         if (ch == '(' || ch == ')')
         {
             return(true);
         }
         if (ch == '+')
         {
             return(true);
         }
         if (ch == ' ')
         {
             return(true);
         }
         if (ch == 'e' || ch == 'x' || ch == 't' || ch == '.')
         {
             return(true);
         }
     }
     return(false);
 }
Пример #5
0
        public ParseResultCollection Parse(int startIndex)
        {
            string text = NumeralUtil.ConvertChineseNumeral2Arabic(context.Text);
            ParseResultCollection prc = new ParseResultCollection();

            int           boundary      = Math.Min(maxDateTimeTextLength, text.Length - startIndex);
            string        temp          = text.Substring(startIndex, boundary);
            StringBuilder sbDateText    = new StringBuilder();
            StringBuilder sbPatternText = new StringBuilder();
            StringBuilder sbText        = new StringBuilder();
            int           strLen        = 0;
            int           i;
            char          prevCh     = ' ';
            bool          nonNumeric = false;

            for (i = 0; i < boundary; i++)
            {
                char ch = temp[i];

                if (NumeralUtil.IsArabicNumeral(ch))
                {
                    sbDateText.Append(ch);
                    sbText.Append(ch);
                    strLen++;
                }
                else if (ch == '大' || ch == '前' || ch == '昨' || ch == '明' || ch == '今' || ch == '后' || ch == '去')
                {
                }
                else if (ch == '周')
                {
                    if (prevCh == '上')
                    {
                        nonNumeric = true;
                        sbText.Append(prevCh);
                        sbText.Append(ch);
                        break;
                    }
                }
                else if (ch == '天')
                {
                    if (prevCh == '前' || prevCh == '昨' || prevCh == '明' || prevCh == '今' || prevCh == '后')
                    {
                        nonNumeric = true;
                        sbText.Append(prevCh);
                        sbText.Append(ch);
                        break;
                    }
                }
                else if (ch == '年')
                {
                    if (prevCh == '去' || prevCh == '前' || prevCh == '今' || prevCh == '后')
                    {
                        nonNumeric = true;
                        sbText.Append(prevCh);
                        sbText.Append(ch);
                        break;
                    }
                    if (strLen == 0)
                    {
                        return(prc);
                    }
                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('y', strLen));
                    sbPatternText.Append(ch);
                    strLen = 0;
                    sbText.Append(ch);
                }
                else if (ch == '日')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('d', strLen));
                    sbPatternText.Append(ch);
                    strLen = 0;
                    sbText.Append(ch);
                }
                else if (ch == '月')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('M', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == '分')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('m', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == '秒')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('s', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == '点')
                {
                    if (strLen == 0)
                    {
                        return(prc);
                    }

                    sbDateText.Append(ch);
                    sbPatternText.Append(DateUtil.GeneratePatternText('h', strLen));
                    sbPatternText.Append(ch);
                    sbText.Append(ch);
                    strLen = 0;
                }
                else if (ch == ' ')
                {
                    sbText.Append(ch);
                    continue;
                }
                else
                {
                    break;
                }
                prevCh = ch;
            }
            if (sbText.Length > 0 && nonNumeric == true)
            {
                prc.Add(ParseResult.Create(sbText.ToString(), startIndex, POSType.D_T));
                return(prc);
            }
            if (sbDateText.Length == 0 || sbPatternText.Length == 0)
            {
                return(prc);
            }
            DateTime?dt = DateUtil.ParseDate(sbDateText.ToString(), sbPatternText.ToString());

            if (dt != null)
            {
                string result = sbText.ToString();
                prc.Add(ParseResult.Create(result, startIndex, POSType.D_T, dt));
            }
            return(prc);
        }