private static int GetUtf8LengthInBytes(IEnumerable <UnicodeCodePoint> codePoints) { int len = 0; foreach (var codePoint in codePoints) { len += Utf8Encoder.GetNumberOfEncodedBytes(codePoint); } return(len); }
// TODO: This should return Utf16CodeUnits which should wrap byte[]/Span<byte>, same for other encoders private static byte[] GetUtf8BytesFromString(string s) { int len = 0; for (int i = 0; i < s.Length; /* intentionally no increment */) { UnicodeCodePoint codePoint; int encodedChars; if (!Utf16LittleEndianEncoder.TryDecodeCodePointFromString(s, i, out codePoint, out encodedChars)) { throw new ArgumentException("s", "Invalid surrogate pair in the string."); } if (encodedChars <= 0) { // TODO: Fix exception type throw new Exception("internal error"); } int encodedBytes = Utf8Encoder.GetNumberOfEncodedBytes(codePoint); if (encodedBytes == 0) { // TODO: Fix exception type throw new Exception("Internal error: Utf16Decoder somehow got CodePoint out of range"); } len += encodedBytes; i += encodedChars; } byte[] bytes = new byte[len]; unsafe { fixed(byte *array_pinned = bytes) { Span <byte> p = new Span <byte>(array_pinned, len); for (int i = 0; i < s.Length; /* intentionally no increment */) { UnicodeCodePoint codePoint; int encodedChars; if (Utf16LittleEndianEncoder.TryDecodeCodePointFromString(s, i, out codePoint, out encodedChars)) { i += encodedChars; int encodedBytes; if (Utf8Encoder.TryEncodeCodePoint(codePoint, p, out encodedBytes)) { p = p.Slice(encodedBytes); } else { // TODO: Fix exception type throw new Exception("Internal error: Utf16Decoder somehow got CodePoint out of range or the buffer is too small"); } } else { // TODO: Fix exception type throw new Exception("Internal error: we did pre-validation of the string, nothing should go wrong"); } } } } return(bytes); }