예제 #1
0
        /**
         * Attempts to extract a match from a {@code candidate} character sequence.
         *
         * @param candidate  the candidate text that might contain a phone number
         * @param offset  the offset of {@code candidate} within {@link #text}
         * @return  the match found, null if none can be found
         */
        private PhoneNumberMatch extractMatch(string candidate, int offset)
        {
            // Skip a match that is more likely to be a date.
            if (SLASH_SEPARATED_DATES.Match(candidate).Success)
            {
                return(null);
            }

            // Skip potential time-stamps.
            if (TIME_STAMPS.Match(candidate).Success)
            {
                String followingText = text.Substring(offset + candidate.Length);
                if (TIME_STAMPS_SUFFIX.MatchBeginning(followingText).Success)
                {
                    return(null);
                }
            }

            // Try to come up with a valid match given the entire candidate.
            String           rawString = candidate.ToString();
            PhoneNumberMatch match     = parseAndVerify(rawString, offset);

            if (match != null)
            {
                return(match);
            }

            // If that failed, try to find an "inner match" - there might be a phone number within this
            // candidate.
            return(extractInnerMatch(rawString, offset));
        }
예제 #2
0
        /**
         * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
         * that represents a phone number. Returns the next match, null if none was found.
         *
         * @param index  the search index to start searching at
         * @return  the phone number match found, null if none can be found
         */
        private PhoneNumberMatch find(int index)
        {
            Match matched = null;

            while ((maxTries > 0) && (matched = PATTERN.Match(text, index)).Success)
            {
                int    start     = matched.Index;
                string candidate = text.Substring(start, matched.Length);

                // Check for extra numbers at the end.
                // TODO: This is the place to start when trying to support extraction of multiple phone number
                // from split notations (+41 79 123 45 67 / 68).
                candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);

                PhoneNumberMatch match = extractMatch(candidate, start);
                if (match != null)
                {
                    return(match);
                }

                index = start + candidate.Length;
                maxTries--;
            }

            return(null);
        }
예제 #3
0
        /**
         * Trims away any characters after the first match of {@code Regex} in {@code candidate},
         * returning the trimmed version.
         */
        private static string trimAfterFirstMatch(JavaRegex regex, string candidate)
        {
            var trailingCharsMatcher = regex.Match(candidate);

            if (trailingCharsMatcher.Success)
            {
                candidate = candidate.Substring(0, trailingCharsMatcher.Index);
            }
            return(candidate);
        }
예제 #4
0
        /**
         * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
         * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
         * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
         *
         * @param candidate  the candidate match
         * @param offset  the offset of {@code candidate} within {@link #text}
         * @return  the parsed and validated phone number match, or null
         */
        private PhoneNumberMatch parseAndVerify(String candidate, int offset)
        {
            try {
                // Check the candidate doesn't contain any formatting which would indicate that it really
                // isn't a phone number.
                if (!MATCHING_BRACKETS.MatchWhole(candidate).Success || PUB_PAGES.Match(candidate).Success)
                {
                    return(null);
                }

                // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
                // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
                if (leniency.CompareTo(PhoneNumberUtil.Leniency.VALID) >= 0)
                {
                    // If the candidate is not at the start of the text, and does not start with phone-number
                    // punctuation, check the previous character.
                    if (offset > 0 && !LEAD_CLASS.MatchBeginning(candidate).Success)
                    {
                        char previousChar = text[offset - 1];
                        // We return null if it is a latin letter or an invalid punctuation symbol.
                        if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar))
                        {
                            return(null);
                        }
                    }
                    int lastCharIndex = offset + candidate.Length;
                    if (lastCharIndex < text.Length)
                    {
                        char nextChar = text[lastCharIndex];
                        if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar))
                        {
                            return(null);
                        }
                    }
                }

                PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);

                // Check Israel * numbers: these are a special case in that they are four-digit numbers that
                // our library supports, but they can only be dialled with a leading *. Since we don't
                // actually store or detect the * in our phone number library, this means in practice we
                // detect most four digit numbers as being valid for Israel. We are considering moving these
                // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the
                // meantime we want to restrict the false matches so we only allow these numbers if they are
                // preceded by a star. We enforce this for all leniency levels even though these numbers are
                // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a
                // deficiency in those methods that they accept these numbers without the *.
                // TODO: Remove this or make it significantly less hacky once we've decided how to
                // handle these short codes going forward in ShortNumberInfo. We could use the formatting
                // rules for instance, but that would be slower.
                if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).Equals("IL") &&
                    phoneUtil.getNationalSignificantNumber(number).Length == 4 &&
                    (offset == 0 || (offset > 0 && text[offset - 1] != '*')))
                {
                    // No match.
                    return(null);
                }

                if (leniency.verify(number, candidate, phoneUtil))
                {
                    // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
                    // values parsed. TODO: stop clearing all values here and switch all users over
                    // to using rawInput() rather than the rawString() of PhoneNumberMatch.
                    number.clearCountryCodeSource();
                    number.clearRawInput();
                    number.clearPreferredDomesticCarrierCode();
                    return(new PhoneNumberMatch(offset, candidate, number));
                }
            } catch (NumberParseException) {
                // ignore and continue
            }
            return(null);
        }