public readonly int Value; // = U+0000 if using default init /// <summary> /// Constructs a Unicode scalar from the given UTF-16 code point. /// The code point must not be a surrogate. /// </summary> /// <param name="char"></param> public UnicodeScalar(char @char) : this((uint)@char) { // None of the APIs on this type are guaranteed to produce correct results // if we don't validate the input during construction. if (Utf8Util.IsLowWordSurrogate((uint)Value)) { throw new ArgumentOutOfRangeException( message: "Value must be between U+0000 and U+D7FF, inclusive; or value must be between U+E000 and U+FFFF, inclusive.", paramName: nameof(@char)); } }
/// <summary> /// Calculates the byte count needed to encode the UTF-16 bytes from the specified UTF-8 sequence. /// /// This method will consume as many of the input bytes as possible. /// </summary> /// <param name="source">A span containing a sequence of UTF-8 bytes.</param> /// <param name="bytesNeeded">On exit, contains the number of bytes required for encoding from the <paramref name="source"/>.</param> /// <returns>A <see cref="OperationStatus"/> value representing the expected state of the conversion.</returns> public static OperationStatus ToUtf16Length(ReadOnlySpan <byte> source, out int bytesNeeded) { if (Utf8Util.GetIndexOfFirstInvalidUtf8Sequence(source, out int scalarCount, out int surrogatePairCount) < 0) { // Well-formed UTF-8 string. // 'scalarCount + surrogatePairCount' is guaranteed not to overflow because // the UTF-16 representation of a string will never have a greater number of // of code units than its UTF-8 representation. int numCodeUnits = scalarCount + surrogatePairCount; // UTF-8 code units are 2 bytes. bytesNeeded = checked (numCodeUnits * 2); return(OperationStatus.Done); }
private static bool IsValidScalar(uint value) => (value < 0xD800U) || Utf8Util.IsInRangeInclusive(value, 0xE000U, 0x10FFFFU);