/// <summary> /// Attempts to extract a match from a <c>candidate</c> character sequence. /// </summary> /// /// <param name="candidate">the candidate text that might contain a phone number</param> /// <param name="offset">the offset of <c>candidate</c> within <see cref="text" /></param> /// <returns>the match found, null if none can be found</returns> private PhoneNumberMatch ExtractMatch(string candidate, int offset) { // Skip a match that is more likely a publication page reference or a date. if (PubPages.IsMatch(candidate) || SlashSeparatedDates.IsMatch(candidate)) { return(null); } // Skip potential time-stamps. if (TimeStamps.IsMatch(candidate)) { var followingText = text.Substring(offset + candidate.Length); if (TimeStampsSuffix.IsMatchBeginning(followingText)) { return(null); } } // Try to come up with a valid match given the entire candidate. var rawString = candidate; var match = ParseAndVerify(rawString, offset); return(match ?? ExtractInnerMatch(rawString, offset)); // If that failed, try to find an "inner match" - there might be a phone number within this // candidate. }
/// <summary> /// Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and /// verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a /// corresponding <see cref="PhoneNumberMatch" /> is returned, otherwise this method returns null. /// </summary> /// /// <param name="candidate">the candidate match</param> /// <param name="offset">the offset of <c>candidate</c> within <see cref="text" /></param> /// <returns>the parsed and validated phone number match, or null</returns> private PhoneNumberMatch ParseAndVerify(string candidate, int offset) { try { // Check the candidate doesn't contain any formatting which would indicate that it really // isn't a phone number. if (!MatchingBrackets.IsMatchAll(candidate)) { return(null); } // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. if (leniency >= PhoneNumberUtil.Leniency.VALID) { // If the candidate is not at the start of the text, and does not start with phone-number // punctuation, check the previous character. if (offset > 0 && !LeadClass.IsMatchBeginning(candidate)) { var previousChar = text[offset - 1]; // We return null if it is a latin letter or an invalid punctuation symbol. if (IsInvalidPunctuationSymbol(previousChar) || IsLatinLetter(previousChar)) { return(null); } } var lastCharIndex = offset + candidate.Length; if (lastCharIndex < text.Length) { var nextChar = text[lastCharIndex]; if (IsInvalidPunctuationSymbol(nextChar) || IsLatinLetter(nextChar)) { return(null); } } } var number = phoneUtil.ParseAndKeepRawInput(candidate, preferredRegion); if (leniency.Verify(number, candidate, phoneUtil, this)) { // We used parseAndKeepRawInput to create this number, but for now we don't return the extra // values parsed. TODO: stop clearing all values here and switch all users over // to using rawInput() rather than the rawString() of PhoneNumberMatch. var bnumber = number.ToBuilder(); bnumber.ClearCountryCodeSource(); bnumber.ClearRawInput(); bnumber.ClearPreferredDomesticCarrierCode(); return(new PhoneNumberMatch(offset, candidate, bnumber.Build())); } } catch (NumberParseException) { // ignore and continue } return(null); }