public static void Run() { Console.WriteLine("Converting ox emoji to UTF-16 and UTF-8"); Rune rune = Rune.GetRuneAt("🐂", 0); Console.WriteLine($"Rune value: {rune.Value}"); // <SnippetUtf16CharArray> char[] chars = new char[rune.Utf16SequenceLength]; int numCharsWritten = rune.EncodeToUtf16(chars); // </SnippetUtf16CharArray> Console.WriteLine($"Number of chars: {numCharsWritten}"); // <SnippetUtf16String> string theString = rune.ToString(); // </SnippetUtf16String> // <SnippetUtf8ByteArray> byte[] bytes = new byte[rune.Utf8SequenceLength]; int numBytesWritten = rune.EncodeToUtf8(bytes); // </SnippetUtf8ByteArray> Console.WriteLine($"Number of UTF-8 bytes: {numBytesWritten}"); }
public static void TryEncodeToUtf8(GeneralTestData testData) { Rune rune = new Rune(testData.ScalarValue); Assert.Equal(testData.Utf8Sequence.Length, actual: rune.Utf8SequenceLength); // First, try with a buffer that's too short Span <byte> utf8Buffer = stackalloc byte[rune.Utf8SequenceLength - 1]; bool success = rune.TryEncodeToUtf8(utf8Buffer, out int bytesWritten); Assert.False(success); Assert.Equal(0, bytesWritten); Assert.Throws <ArgumentException>("destination", () => rune.EncodeToUtf8(new byte[rune.Utf8SequenceLength - 1])); // Then, try with a buffer that's appropriately sized utf8Buffer = stackalloc byte[rune.Utf8SequenceLength]; success = rune.TryEncodeToUtf8(utf8Buffer, out bytesWritten); Assert.True(success); Assert.Equal(testData.Utf8Sequence.Length, bytesWritten); Assert.True(utf8Buffer.SequenceEqual(testData.Utf8Sequence)); utf8Buffer.Clear(); Assert.Equal(testData.Utf8Sequence.Length, rune.EncodeToUtf8(utf8Buffer)); Assert.True(utf8Buffer.SequenceEqual(testData.Utf8Sequence)); // Finally, try with a buffer that's too long (should succeed) utf8Buffer = stackalloc byte[rune.Utf8SequenceLength + 1]; success = rune.TryEncodeToUtf8(utf8Buffer, out bytesWritten); Assert.True(success); Assert.Equal(testData.Utf8Sequence.Length, bytesWritten); Assert.True(utf8Buffer.Slice(0, testData.Utf8Sequence.Length).SequenceEqual(testData.Utf8Sequence)); utf8Buffer.Clear(); Assert.Equal(testData.Utf8Sequence.Length, rune.EncodeToUtf8(utf8Buffer)); Assert.True(utf8Buffer.Slice(0, testData.Utf8Sequence.Length).SequenceEqual(testData.Utf8Sequence)); }
private static unsafe void RuneCore(ref ValueStringBuilder dest, char *pInput, bool surrogatePair) { Span <byte> encodedBytes = stackalloc byte[MaxNumberOfBytesEncoded]; Rune rune = (surrogatePair) ? new Rune(*pInput, *(pInput + 1)) : new Rune(*pInput); int encodedBytesCount = rune.EncodeToUtf8(encodedBytes); encodedBytes = encodedBytes.Slice(0, encodedBytesCount); foreach (byte b in encodedBytes) { UriHelper.EscapeAsciiChar((char)b, ref dest); } }
private static void TestUtf8String() { Rune rune = Rune.GetRuneAt("😃", 0); byte[] t = new byte[12]; t[0] = (byte)'a'; t[1] = (byte)'b'; t[2] = (byte)'c'; t[3] = (byte)'d'; rune.EncodeToUtf8(t.AsSpan(4)); t[8 + 0] = (byte)'e'; t[8 + 1] = (byte)'f'; t[8 + 2] = (byte)'g'; t[8 + 3] = (byte)'h'; var s = new Utf8Splitter(t, default, StringSplitOptions.None);
public void Append(char c) { Rune rune = new Rune(c); byte[] bytes = new byte[rune.Utf8SequenceLength]; rune.EncodeToUtf8(bytes); if (protectedMemory.ContentLength + rune.Utf8SequenceLength >= protectedMemory.Size) { ProtectedMemory newProtectedMemory = ProtectedMemory.Allocate(2 * protectedMemory.Size); protectedMemory.CopyTo(0, newProtectedMemory, 0, Length); protectedMemory.Free(); protectedMemory = newProtectedMemory; } protectedMemory.Write(bytes, protectedMemory.ContentLength); Length++; }
internal static void Unescape(ReadOnlySpan <byte> source, Span <byte> destination, int idx, out int written) { Debug.Assert(idx >= 0 && idx < source.Length); Debug.Assert(source[idx] == JsonConstants.BackSlash); Debug.Assert(destination.Length >= source.Length); source.Slice(0, idx).CopyTo(destination); written = idx; for (; idx < source.Length; idx++) { byte currentByte = source[idx]; if (currentByte == JsonConstants.BackSlash) { idx++; currentByte = source[idx]; if (currentByte == JsonConstants.Quote) { destination[written++] = JsonConstants.Quote; } else if (currentByte == 'n') { destination[written++] = JsonConstants.LineFeed; } else if (currentByte == 'r') { destination[written++] = JsonConstants.CarriageReturn; } else if (currentByte == JsonConstants.BackSlash) { destination[written++] = JsonConstants.BackSlash; } else if (currentByte == JsonConstants.Slash) { destination[written++] = JsonConstants.Slash; } else if (currentByte == 't') { destination[written++] = JsonConstants.Tab; } else if (currentByte == 'b') { destination[written++] = JsonConstants.BackSpace; } else if (currentByte == 'f') { destination[written++] = JsonConstants.FormFeed; } else if (currentByte == 'u') { // The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it // Otherwise, the Utf8JsonReader would have alreayd thrown an exception. Debug.Assert(source.Length >= idx + 5); bool result = Utf8Parser.TryParse(source.Slice(idx + 1, 4), out int scalar, out int bytesConsumed, 'x'); Debug.Assert(result); Debug.Assert(bytesConsumed == 4); idx += bytesConsumed; // The loop iteration will increment idx past the last hex digit if (JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.LowSurrogateEndValue)) { // The first hex value cannot be a low surrogate. if (scalar >= JsonConstants.LowSurrogateStartValue) { ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(scalar); } Debug.Assert(JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.HighSurrogateEndValue)); idx += 3; // Skip the last hex digit and the next \u // We must have a low surrogate following a high surrogate. if (source.Length < idx + 4 || source[idx - 2] != '\\' || source[idx - 1] != 'u') { ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(); } // The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it // Otherwise, the Utf8JsonReader would have alreayd thrown an exception. result = Utf8Parser.TryParse(source.Slice(idx, 4), out int lowSurrogate, out bytesConsumed, 'x'); Debug.Assert(result); Debug.Assert(bytesConsumed == 4); // If the first hex value is a high surrogate, the next one must be a low surrogate. if (!JsonHelpers.IsInRangeInclusive((uint)lowSurrogate, JsonConstants.LowSurrogateStartValue, JsonConstants.LowSurrogateEndValue)) { ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(lowSurrogate); } idx += bytesConsumed - 1; // The loop iteration will increment idx past the last hex digit // To find the unicode scalar: // (0x400 * (High surrogate - 0xD800)) + Low surrogate - 0xDC00 + 0x10000 scalar = (JsonConstants.BitShiftBy10 * (scalar - JsonConstants.HighSurrogateStartValue)) + (lowSurrogate - JsonConstants.LowSurrogateStartValue) + JsonConstants.UnicodePlane01StartValue; } #if BUILDING_INBOX_LIBRARY var rune = new Rune(scalar); int bytesWritten = rune.EncodeToUtf8(destination.Slice(written)); #else EncodeToUtf8Bytes((uint)scalar, destination.Slice(written), out int bytesWritten); #endif Debug.Assert(bytesWritten <= 4); written += bytesWritten; } } else { destination[written++] = currentByte; } } }