/** * Attempts to extract a match from a {@code candidate} character sequence. * * @param candidate the candidate text that might contain a phone number * @param offset the offset of {@code candidate} within {@link #text} * @return the match found, null if none can be found */ private PhoneNumberMatch extractMatch(string candidate, int offset) { // Skip a match that is more likely to be a date. if (SLASH_SEPARATED_DATES.Match(candidate).Success) { return(null); } // Skip potential time-stamps. if (TIME_STAMPS.Match(candidate).Success) { String followingText = text.Substring(offset + candidate.Length); if (TIME_STAMPS_SUFFIX.MatchBeginning(followingText).Success) { return(null); } } // Try to come up with a valid match given the entire candidate. String rawString = candidate.ToString(); PhoneNumberMatch match = parseAndVerify(rawString, offset); if (match != null) { return(match); } // If that failed, try to find an "inner match" - there might be a phone number within this // candidate. return(extractInnerMatch(rawString, offset)); }
/** * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} * that represents a phone number. Returns the next match, null if none was found. * * @param index the search index to start searching at * @return the phone number match found, null if none can be found */ private PhoneNumberMatch find(int index) { Match matched = null; while ((maxTries > 0) && (matched = PATTERN.Match(text, index)).Success) { int start = matched.Index; string candidate = text.Substring(start, matched.Length); // Check for extra numbers at the end. // TODO: This is the place to start when trying to support extraction of multiple phone number // from split notations (+41 79 123 45 67 / 68). candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate); PhoneNumberMatch match = extractMatch(candidate, start); if (match != null) { return(match); } index = start + candidate.Length; maxTries--; } return(null); }
/** * Trims away any characters after the first match of {@code Regex} in {@code candidate}, * returning the trimmed version. */ private static string trimAfterFirstMatch(JavaRegex regex, string candidate) { var trailingCharsMatcher = regex.Match(candidate); if (trailingCharsMatcher.Success) { candidate = candidate.Substring(0, trailingCharsMatcher.Index); } return(candidate); }
/** * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null. * * @param candidate the candidate match * @param offset the offset of {@code candidate} within {@link #text} * @return the parsed and validated phone number match, or null */ private PhoneNumberMatch parseAndVerify(String candidate, int offset) { try { // Check the candidate doesn't contain any formatting which would indicate that it really // isn't a phone number. if (!MATCHING_BRACKETS.MatchWhole(candidate).Success || PUB_PAGES.Match(candidate).Success) { return(null); } // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. if (leniency.CompareTo(PhoneNumberUtil.Leniency.VALID) >= 0) { // If the candidate is not at the start of the text, and does not start with phone-number // punctuation, check the previous character. if (offset > 0 && !LEAD_CLASS.MatchBeginning(candidate).Success) { char previousChar = text[offset - 1]; // We return null if it is a latin letter or an invalid punctuation symbol. if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { return(null); } } int lastCharIndex = offset + candidate.Length; if (lastCharIndex < text.Length) { char nextChar = text[lastCharIndex]; if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { return(null); } } } PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion); // Check Israel * numbers: these are a special case in that they are four-digit numbers that // our library supports, but they can only be dialled with a leading *. Since we don't // actually store or detect the * in our phone number library, this means in practice we // detect most four digit numbers as being valid for Israel. We are considering moving these // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the // meantime we want to restrict the false matches so we only allow these numbers if they are // preceded by a star. We enforce this for all leniency levels even though these numbers are // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a // deficiency in those methods that they accept these numbers without the *. // TODO: Remove this or make it significantly less hacky once we've decided how to // handle these short codes going forward in ShortNumberInfo. We could use the formatting // rules for instance, but that would be slower. if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).Equals("IL") && phoneUtil.getNationalSignificantNumber(number).Length == 4 && (offset == 0 || (offset > 0 && text[offset - 1] != '*'))) { // No match. return(null); } if (leniency.verify(number, candidate, phoneUtil)) { // We used parseAndKeepRawInput to create this number, but for now we don't return the extra // values parsed. TODO: stop clearing all values here and switch all users over // to using rawInput() rather than the rawString() of PhoneNumberMatch. number.clearCountryCodeSource(); number.clearRawInput(); number.clearPreferredDomesticCarrierCode(); return(new PhoneNumberMatch(offset, candidate, number)); } } catch (NumberParseException) { // ignore and continue } return(null); }