/// <summary> /// A copy of the logic in Rune.DecodeFromUtf16. /// </summary> public static OperationStatus DecodeScalarValueFromUtf16(ReadOnlySpan <char> source, out uint result, out int charsConsumed) { const char ReplacementChar = '\uFFFD'; if (!source.IsEmpty) { // First, check for the common case of a BMP scalar value. // If this is correct, return immediately. uint firstChar = source[0]; if (!UnicodeUtility.IsSurrogateCodePoint(firstChar)) { result = firstChar; charsConsumed = 1; return(OperationStatus.Done); } // First thing we saw was a UTF-16 surrogate code point. // Let's optimistically assume for now it's a high surrogate and hope // that combining it with the next char yields useful results. if (1 < (uint)source.Length) { uint secondChar = source[1]; if (UnicodeUtility.IsHighSurrogateCodePoint(firstChar) && UnicodeUtility.IsLowSurrogateCodePoint(secondChar)) { // Success! Formed a supplementary scalar value. result = UnicodeUtility.GetScalarFromUtf16SurrogatePair(firstChar, secondChar); charsConsumed = 2; return(OperationStatus.Done); } else { // Either the first character was a low surrogate, or the second // character was not a low surrogate. This is an error. goto InvalidData; } } else if (!UnicodeUtility.IsHighSurrogateCodePoint(firstChar)) { // Quick check to make sure we're not going to report NeedMoreData for // a single-element buffer where the data is a standalone low surrogate // character. Since no additional data will ever make this valid, we'll // report an error immediately. goto InvalidData; } } // If we got to this point, the input buffer was empty, or the buffer // was a single element in length and that element was a high surrogate char. charsConsumed = source.Length; result = ReplacementChar; return(OperationStatus.NeedMoreData); InvalidData: charsConsumed = 1; // maximal invalid subsequence for UTF-16 is always a single code unit in length result = ReplacementChar; return(OperationStatus.InvalidData); }