// Read through a section of CFWS. If we reach the end of the data string then throw because not enough of the // MailAddress components were found. private static bool TryReadCfwsAndThrowIfIncomplete(string data, int index, out int outIndex, bool throwExceptionIfFail) { if (!WhitespaceReader.TryReadCfwsReverse(data, index, out index, throwExceptionIfFail)) { outIndex = default; return(false); } if (index < 0) { // More components were expected. Incomplete address, invalid if (throwExceptionIfFail) { throw new FormatException("Invalid email address format."); } else { outIndex = default; return(false); } } outIndex = index; return(true); }
// // This method attempts reading quoted-string formatted data when the bounding quotes were omitted. // This is common for e-mail display-names. // // Precondition: The index must be within the bounds of the data string. // // Return value: // - The index of the special delimiter provided. // e.g. In ([email protected], billy box [email protected]), starting at index=19 (x) returns index=9 (,). // - -1 if the terminating character was not found. // e.g. In (my name username@domain), starting at index=5 (e) returns index=-1. // // A FormatException will be thrown or false is returned if: // - A non-escaped character is encountered that is not valid in a quoted string. This includes double quotes. // - A Unicode character is encountered and Unicode has not been allowed. // /// <summary> /// Tries the read reverse un quoted. /// </summary> /// <param name="data">The data.</param> /// <param name="index">The index.</param> /// <param name="permitUnicode">if set to <c>true</c> [permit unicode].</param> /// <param name="expectCommaDelimiter">if set to <c>true</c> [expect comma delimiter].</param> /// <param name="outIndex">Index of the out.</param> /// <param name="throwExceptionIfFail">if set to <c>true</c> [throw exception if fail].</param> /// <returns><c>true</c> if XXXX, <c>false</c> otherwise.</returns> /// <exception cref="FormatException">Invalid character: {data[index]}.</exception> internal static bool TryReadReverseUnQuoted(string data, int index, bool permitUnicode, bool expectCommaDelimiter, out int outIndex, bool throwExceptionIfFail) { Debug.Assert(index >= 0 && index < data.Length, "Index out of range: " + index + ", " + data.Length); do { // Check for valid whitespace if (!WhitespaceReader.TryReadFwsReverse(data, index, out index, throwExceptionIfFail)) { outIndex = default; return(false); } if (index < 0) { break; } // Check for escaped characters if (!QuotedPairReader.TryCountQuotedChars(data, index, permitUnicode, out var quotedCharCount, throwExceptionIfFail)) { outIndex = default; return(false); } if (quotedCharCount > 0) { index -= quotedCharCount; } // Check for the terminating char else if (expectCommaDelimiter && data[index] == ControlChars.Comma) { break; } // Check invalid characters else if (!IsValidQtext(permitUnicode, data[index])) { if (throwExceptionIfFail) { throw new FormatException($"Invalid character: {data[index]}."); } else { outIndex = default; return(false); } } // Valid char else { index--; } }while (index >= 0); outIndex = index; return(true); }
// // This method reads a standard quoted string. Departing from the RFC, Unicode is permitted for display-name. // // Preconditions: // - Index must be within the bounds of the data string. // - The char at the given index is the initial quote. (data[index] == Quote) // // Return value: The next index past the terminating-quote (data[index + 1] == Quote). // e.g. In (bob "user name"@domain), starting at index=14 (") returns index=3 (space). // // A FormatException will be thrown or false is returned if: // - A non-escaped character is encountered that is not valid in a quoted string. // - A Unicode character is encountered and Unicode has not been allowed. // - The final double quote is not found. // /// <summary> /// Tries the read reverse quoted. /// </summary> /// <param name="data">The data.</param> /// <param name="index">The index.</param> /// <param name="permitUnicode">if set to <c>true</c> [permit unicode].</param> /// <param name="outIndex">Index of the out.</param> /// <param name="throwExceptionIfFail">if set to <c>true</c> [throw exception if fail].</param> /// <returns><c>true</c> if XXXX, <c>false</c> otherwise.</returns> /// <exception cref="FormatException">Invalid character: {data[index]}. /// or /// Invalid character: {ControlChars.Quote}.</exception> internal static bool TryReadReverseQuoted(string data, int index, bool permitUnicode, out int outIndex, bool throwExceptionIfFail) { Debug.Assert(0 <= index && index < data.Length, "Index out of range: " + index + ", " + data.Length); // Check for the first bounding quote Debug.Assert(data[index] == ControlChars.Quote, "Initial char at index " + index + " was not a quote."); // Skip the bounding quote index--; do { // Check for valid whitespace if (!WhitespaceReader.TryReadFwsReverse(data, index, out index, throwExceptionIfFail)) { outIndex = default; return(false); } if (index < 0) { break; } // Check for escaped characters if (!QuotedPairReader.TryCountQuotedChars(data, index, permitUnicode, out int quotedCharCount, throwExceptionIfFail)) { outIndex = default; return(false); } if (quotedCharCount > 0) { // Skip quoted pairs index -= quotedCharCount; } // Check for the terminating quote else if (data[index] == ControlChars.Quote) { // Skip the final bounding quote outIndex = index - 1; return(true); } // Check invalid characters else if (!IsValidQtext(permitUnicode, data[index])) { if (throwExceptionIfFail) { throw new FormatException($"Invalid character: {data[index]}."); } else { outIndex = default; return(false); } } // Valid char else { index--; } }while (index >= 0); if (throwExceptionIfFail) { // We started with a quote, but did not end with one throw new FormatException($"Invalid character: {ControlChars.Quote}."); } else { outIndex = default; return(false); } }
// Reads a domain literal in reverse // Preconditions: // - Index must be within the bounds of the data string. // - The char at the given index is the initial bracket. (data[index] == EndSquareBracket) // Return value: // - The next index past the terminating bracket (data[index + 1] == StartSquareBracket). // e.g. In (user@[domain]), starting at index=12 (]) returns index=4 (@). // A FormatException will be thrown or false is returned if: // - A non-escaped character is encountered that is not valid in a domain literal, including Unicode. // - The final bracket is not found. // /// <summary> /// Tries the read reverse. /// </summary> /// <param name="data">The data.</param> /// <param name="index">The index.</param> /// <param name="outIndex">Index of the out.</param> /// <param name="throwExceptionIfFail">if set to <c>true</c> [throw exception if fail].</param> /// <returns><c>true</c> if XXXX, <c>false</c> otherwise.</returns> /// <exception cref="FormatException">Invalid character: { data[index]}. /// or /// Invalid character: { MailBnfHelper.EndSquareBracket}.</exception> internal static bool TryReadReverse(string data, int index, out int outIndex, bool throwExceptionIfFail) { Debug.Assert(0 <= index && index < data.Length, "index was outside the bounds of the string: " + index); Debug.Assert(data[index] == dotNetTips.Utility.Standard.Common.ControlChars.EndSquareBracket, "data did not end with a square bracket"); // Skip the end bracket index--; do { // Check for valid whitespace if (!WhitespaceReader.TryReadFwsReverse(data, index, out index, throwExceptionIfFail)) { outIndex = default; return(false); } if (index < 0) { break; } // Check for escaped characters if (!QuotedPairReader.TryCountQuotedChars(data, index, false, out int quotedCharCount, throwExceptionIfFail)) { outIndex = default; return(false); } if (quotedCharCount > 0) { // Skip quoted pairs index -= quotedCharCount; } // Check for the terminating bracket else if (data[index] == dotNetTips.Utility.Standard.Common.ControlChars.StartSquareBracket) { // We're done parsing outIndex = index - 1; return(true); } // Check for invalid characters else if (data[index] > MailBnfHelper.Ascii7bitMaxValue || !MailBnfHelper.Dtext[data[index]]) { if (throwExceptionIfFail) { throw new FormatException($"Invalid character: { data[index]}."); } else { outIndex = default; return(false); } } // Valid char else { index--; } }while (index >= 0); if (throwExceptionIfFail) { // We didn't find a matching '[', throw. throw new FormatException($"Invalid character: { dotNetTips.Utility.Standard.Common.ControlChars.EndSquareBracket}."); } else { outIndex = default; return(false); } }
// Parses the local-part section of an address. The local-part may be in dot-atom format or // quoted-string format. e.g. <user.name@domain> or <"user name"@domain> // We do not support the obsolete formats of user."name"@domain, "user".name@domain, or "user"."name"@domain. // // Preconditions: // - data[index + 1] is the '@' symbol // // Postconditions: // - data[index] should refer to the '<', if any, otherwise the next non-CFWS char. // - index == -1 if the beginning of the data string has been reached. // - returns the parsed local-part, including any bounding quotes around quoted-strings // // Throws a FormatException or false is returned: // - For invalid un-escaped chars, including Unicode // - If the final value of data[index] is not a valid character to precede the local-part private static bool TryParseLocalPart(string data, ref int index, bool expectAngleBracket, bool expectMultipleAddresses, out string localPart, bool throwExceptionIfFail) { // Skip comments and whitespace if (!TryReadCfwsAndThrowIfIncomplete(data, index, out index, throwExceptionIfFail)) { localPart = default; return(false); } // Mark the start of the local-part var startingIndex = index; // Is the local-part component in quoted-string format or dot-atom format? if (data[index] == dotNetTips.Utility.Standard.Common.ControlChars.Quote) { if (!QuotedStringFormatReader.TryReadReverseQuoted(data, index, true, out index, throwExceptionIfFail)) { localPart = default; return(false); } } else { if (!DotAtomReader.TryReadReverse(data, index, out index, throwExceptionIfFail)) { localPart = default; return(false); } // Check that the local-part is properly separated from the next component. It may be separated by a // comment, whitespace, an expected angle bracket, a quote for the display-name, or an expected comma // before the next address. if (index >= 0 && !( MailBnfHelper.IsAllowedWhiteSpace(data[index]) || // < local@domain > data[index] == dotNetTips.Utility.Standard.Common.ControlChars.EndComment || // <(comment)local@domain> (expectAngleBracket && data[index] == dotNetTips.Utility.Standard.Common.ControlChars.StartAngleBracket) || // <local@domain> (expectMultipleAddresses && data[index] == dotNetTips.Utility.Standard.Common.ControlChars.Comma) // local@dom,local@dom // Note: The following condition is more lax than the RFC. This is done so we could support // a common invalid formats as shown below. || data[index] == dotNetTips.Utility.Standard.Common.ControlChars.Quote // "display"local@domain ) ) { if (throwExceptionIfFail) { throw new FormatException($"Invalid character: {data[index]}."); } else { localPart = default; return(false); } } } localPart = data.Substring(index + 1, startingIndex - index); if (!WhitespaceReader.TryReadCfwsReverse(data, index, out index, throwExceptionIfFail)) { return(false); } if (!TryNormalizeOrThrow(localPart, out localPart, throwExceptionIfFail)) { return(false); } return(true); }
private static bool TryParseEmailAddress(string data, bool expectMultipleAddresses, int index, out ParseAddressInfo parseAddressInfo, bool throwExceptionIfFail) { Debug.Assert(!string.IsNullOrEmpty(data)); Debug.Assert(index >= 0 && index < data.Length, "Index out of range: " + index + ", " + data.Length); // Parsed components to be assembled as a MailAddress later string displayName; // Skip comments and whitespace if (!TryReadCfwsAndThrowIfIncomplete(data, index, out index, throwExceptionIfFail)) { parseAddressInfo = default; return(false); } // Do we expect angle brackets around the address? // e.g. ("display name" <user@domain>) bool expectAngleBracket = false; if (data[index] == dotNetTips.Utility.Standard.Common.ControlChars.EndAngleBracket) { expectAngleBracket = true; index--; } if (!TryParseDomain(data, ref index, out string domain, throwExceptionIfFail)) { parseAddressInfo = default; return(false); } // The next character after the domain must be the '@' symbol if (data[index] != dotNetTips.Utility.Standard.Common.ControlChars.At) { if (throwExceptionIfFail) { throw new FormatException("Invalid email address."); } else { parseAddressInfo = default; return(false); } } // Skip the '@' symbol index--; if (!TryParseLocalPart(data, ref index, expectAngleBracket, expectMultipleAddresses, out var localPart, throwExceptionIfFail)) { parseAddressInfo = default; return(false); } // Check for a matching angle bracket around the address if (expectAngleBracket) { if (index >= 0 && data[index] == dotNetTips.Utility.Standard.Common.ControlChars.StartAngleBracket) { index--; // Skip the angle bracket // Skip whitespace, but leave comments, as they may be part of the display name. if (!WhitespaceReader.TryReadFwsReverse(data, index, out index, throwExceptionIfFail)) { parseAddressInfo = default; return(false); } } else { // Mismatched angle brackets if (throwExceptionIfFail) { throw new FormatException($"Invalid character: {( index >= 0 ? data[index] : dotNetTips.Utility.Standard.Common.ControlChars.EndAngleBracket )}."); } else { parseAddressInfo = default; return(false); } } } // Is there anything left to parse? // There could still be a display name or another address if (index >= 0 && !(expectMultipleAddresses && data[index] == dotNetTips.Utility.Standard.Common.ControlChars.Comma)) { if (!TryParseDisplayName(data, ref index, expectMultipleAddresses, out displayName, throwExceptionIfFail)) { parseAddressInfo = default; return(false); } } else { displayName = string.Empty; } parseAddressInfo = new ParseAddressInfo(displayName, localPart, domain); return(true); }
// Parses the display-name section of an address. In departure from the RFC, we attempt to read data in the // quoted-string format even if the bounding quotes are omitted. We also permit Unicode, which the RFC does // not allow for. // e.g. ("display name" <user@domain>) or (display name <user@domain>) // // Preconditions: // // Postconditions: // - data[index] should refer to the comma ',' separator, if any // - index == -1 if the beginning of the data string has been reached. // - returns the parsed display-name, excluding any bounding quotes around quoted-strings // // Throws a FormatException or false is returned: // - For invalid un-escaped chars, except Unicode // - If the postconditions cannot be met. private static bool TryParseDisplayName(string data, ref int index, bool expectMultipleAddresses, out string displayName, bool throwExceptionIfFail) { // Whatever is left over must be the display name. The display name should be a single word/atom or a // quoted string, but for robustness we allow the quotes to be omitted, so long as we can find the comma // separator before the next address. // Read the comment (if any). If the display name is contained in quotes, the surrounding comments are // omitted. Otherwise, mark this end of the comment so we can include it as part of the display name. if (!WhitespaceReader.TryReadCfwsReverse(data, index, out int firstNonCommentIndex, throwExceptionIfFail)) { displayName = default; return(false); } // Check to see if there's a quoted-string display name if (firstNonCommentIndex >= 0 && data[firstNonCommentIndex] == dotNetTips.Utility.Standard.Common.ControlChars.Quote) { // The preceding comment was not part of the display name. Read just the quoted string. if (!QuotedStringFormatReader.TryReadReverseQuoted(data, firstNonCommentIndex, true, out index, throwExceptionIfFail)) { displayName = default; return(false); } Debug.Assert(data[index + 1] == dotNetTips.Utility.Standard.Common.ControlChars.Quote, "Mis-aligned index: " + index); // Do not include the bounding quotes on the display name int leftIndex = index + 2; displayName = data.Substring(leftIndex, firstNonCommentIndex - leftIndex); // Skip any CFWS after the display name if (!WhitespaceReader.TryReadCfwsReverse(data, index, out index, throwExceptionIfFail)) { return(false); } // Check for completion. We are valid if we hit the end of the data string or if the rest of the data // belongs to another address. if (index >= 0 && !(expectMultipleAddresses && data[index] == dotNetTips.Utility.Standard.Common.ControlChars.Comma)) { // If there was still data, only a comma could have been the next valid character return(throwExceptionIfFail ? throw new FormatException($"Invalid character: {data[index]}.") : false); } } else { // The comment (if any) should be part of the display name. int startingIndex = index; // Read until the dividing comma or the end of the line. if (!QuotedStringFormatReader.TryReadReverseUnQuoted(data, index, true, expectMultipleAddresses, out index, throwExceptionIfFail)) { displayName = default; return(false); } Debug.Assert(index < 0 || data[index] == dotNetTips.Utility.Standard.Common.ControlChars.Comma, "Mis-aligned index: " + index); // Do not include the Comma (if any), and because there were no bounding quotes, // trim extra whitespace. displayName = data.SubstringTrim(index + 1, startingIndex - index); } if (!TryNormalizeOrThrow(displayName, out displayName, throwExceptionIfFail)) { return(false); } return(true); }