private static (uint, int) GetScalar(ref char source, int index, int length) { char charA = source; if (!char.IsHighSurrogate(charA) || index >= length - 1) { return((uint)charA, 1); } char charB = Unsafe.Add(ref source, 1); if (!char.IsLowSurrogate(charB)) { return((uint)charA, 1); } return(UnicodeUtility.GetScalarFromUtf16SurrogatePair(charA, charB), 2); }
/// <summary> /// A copy of the logic in Rune.DecodeFromUtf16. /// </summary> public static OperationStatus DecodeScalarValueFromUtf16(ReadOnlySpan <char> source, out uint result, out int charsConsumed) { const char ReplacementChar = '\uFFFD'; if (!source.IsEmpty) { // First, check for the common case of a BMP scalar value. // If this is correct, return immediately. uint firstChar = source[0]; if (!UnicodeUtility.IsSurrogateCodePoint(firstChar)) { result = firstChar; charsConsumed = 1; return(OperationStatus.Done); } // First thing we saw was a UTF-16 surrogate code point. // Let's optimistically assume for now it's a high surrogate and hope // that combining it with the next char yields useful results. if (1 < (uint)source.Length) { uint secondChar = source[1]; if (UnicodeUtility.IsHighSurrogateCodePoint(firstChar) && UnicodeUtility.IsLowSurrogateCodePoint(secondChar)) { // Success! Formed a supplementary scalar value. result = UnicodeUtility.GetScalarFromUtf16SurrogatePair(firstChar, secondChar); charsConsumed = 2; return(OperationStatus.Done); } else { // Either the first character was a low surrogate, or the second // character was not a low surrogate. This is an error. goto InvalidData; } } else if (!UnicodeUtility.IsHighSurrogateCodePoint(firstChar)) { // Quick check to make sure we're not going to report NeedMoreData for // a single-element buffer where the data is a standalone low surrogate // character. Since no additional data will ever make this valid, we'll // report an error immediately. goto InvalidData; } } // If we got to this point, the input buffer was empty, or the buffer // was a single element in length and that element was a high surrogate char. charsConsumed = source.Length; result = ReplacementChar; return(OperationStatus.NeedMoreData); InvalidData: charsConsumed = 1; // maximal invalid subsequence for UTF-16 is always a single code unit in length result = ReplacementChar; return(OperationStatus.InvalidData); }
internal static void ToLower(char h, char l, out char hr, out char lr) { Debug.Assert(char.IsHighSurrogate(h)); Debug.Assert(char.IsLowSurrogate(l)); UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar(CharUnicodeInfo.ToLower(UnicodeUtility.GetScalarFromUtf16SurrogatePair(h, l)), out hr, out lr); }