Example #1
0
        public static void Run()
        {
            Console.WriteLine("Converting ox emoji to UTF-16 and UTF-8");
            Rune rune = Rune.GetRuneAt("🐂", 0);

            Console.WriteLine($"Rune value: {rune.Value}");

            // <SnippetUtf16CharArray>
            char[] chars           = new char[rune.Utf16SequenceLength];
            int    numCharsWritten = rune.EncodeToUtf16(chars);

            // </SnippetUtf16CharArray>

            Console.WriteLine($"Number of chars: {numCharsWritten}");

            // <SnippetUtf16String>
            string theString = rune.ToString();

            // </SnippetUtf16String>

            // <SnippetUtf8ByteArray>
            byte[] bytes           = new byte[rune.Utf8SequenceLength];
            int    numBytesWritten = rune.EncodeToUtf8(bytes);

            // </SnippetUtf8ByteArray>

            Console.WriteLine($"Number of UTF-8 bytes: {numBytesWritten}");
        }
Example #2
0
        public static void TryEncodeToUtf8(GeneralTestData testData)
        {
            Rune rune = new Rune(testData.ScalarValue);

            Assert.Equal(testData.Utf8Sequence.Length, actual: rune.Utf8SequenceLength);

            // First, try with a buffer that's too short

            Span <byte> utf8Buffer = stackalloc byte[rune.Utf8SequenceLength - 1];
            bool        success    = rune.TryEncodeToUtf8(utf8Buffer, out int bytesWritten);

            Assert.False(success);
            Assert.Equal(0, bytesWritten);

            Assert.Throws <ArgumentException>("destination", () => rune.EncodeToUtf8(new byte[rune.Utf8SequenceLength - 1]));

            // Then, try with a buffer that's appropriately sized

            utf8Buffer = stackalloc byte[rune.Utf8SequenceLength];
            success    = rune.TryEncodeToUtf8(utf8Buffer, out bytesWritten);
            Assert.True(success);
            Assert.Equal(testData.Utf8Sequence.Length, bytesWritten);
            Assert.True(utf8Buffer.SequenceEqual(testData.Utf8Sequence));

            utf8Buffer.Clear();
            Assert.Equal(testData.Utf8Sequence.Length, rune.EncodeToUtf8(utf8Buffer));
            Assert.True(utf8Buffer.SequenceEqual(testData.Utf8Sequence));

            // Finally, try with a buffer that's too long (should succeed)

            utf8Buffer = stackalloc byte[rune.Utf8SequenceLength + 1];
            success    = rune.TryEncodeToUtf8(utf8Buffer, out bytesWritten);
            Assert.True(success);
            Assert.Equal(testData.Utf8Sequence.Length, bytesWritten);
            Assert.True(utf8Buffer.Slice(0, testData.Utf8Sequence.Length).SequenceEqual(testData.Utf8Sequence));

            utf8Buffer.Clear();
            Assert.Equal(testData.Utf8Sequence.Length, rune.EncodeToUtf8(utf8Buffer));
            Assert.True(utf8Buffer.Slice(0, testData.Utf8Sequence.Length).SequenceEqual(testData.Utf8Sequence));
        }
Example #3
0
        private static unsafe void RuneCore(ref ValueStringBuilder dest, char *pInput, bool surrogatePair)
        {
            Span <byte> encodedBytes      = stackalloc byte[MaxNumberOfBytesEncoded];
            Rune        rune              = (surrogatePair) ? new Rune(*pInput, *(pInput + 1)) : new Rune(*pInput);
            int         encodedBytesCount = rune.EncodeToUtf8(encodedBytes);

            encodedBytes = encodedBytes.Slice(0, encodedBytesCount);

            foreach (byte b in encodedBytes)
            {
                UriHelper.EscapeAsciiChar((char)b, ref dest);
            }
        }
        private static void TestUtf8String()
        {
            Rune rune = Rune.GetRuneAt("😃", 0);

            byte[] t = new byte[12];
            t[0] = (byte)'a';
            t[1] = (byte)'b';
            t[2] = (byte)'c';
            t[3] = (byte)'d';
            rune.EncodeToUtf8(t.AsSpan(4));
            t[8 + 0] = (byte)'e';
            t[8 + 1] = (byte)'f';
            t[8 + 2] = (byte)'g';
            t[8 + 3] = (byte)'h';

            var s = new Utf8Splitter(t, default, StringSplitOptions.None);
Example #5
0
        public void Append(char c)
        {
            Rune rune = new Rune(c);

            byte[] bytes = new byte[rune.Utf8SequenceLength];
            rune.EncodeToUtf8(bytes);
            if (protectedMemory.ContentLength + rune.Utf8SequenceLength >= protectedMemory.Size)
            {
                ProtectedMemory newProtectedMemory = ProtectedMemory.Allocate(2 * protectedMemory.Size);
                protectedMemory.CopyTo(0, newProtectedMemory, 0, Length);
                protectedMemory.Free();
                protectedMemory = newProtectedMemory;
            }
            protectedMemory.Write(bytes, protectedMemory.ContentLength);
            Length++;
        }
        internal static void Unescape(ReadOnlySpan <byte> source, Span <byte> destination, int idx, out int written)
        {
            Debug.Assert(idx >= 0 && idx < source.Length);
            Debug.Assert(source[idx] == JsonConstants.BackSlash);
            Debug.Assert(destination.Length >= source.Length);

            source.Slice(0, idx).CopyTo(destination);
            written = idx;

            for (; idx < source.Length; idx++)
            {
                byte currentByte = source[idx];
                if (currentByte == JsonConstants.BackSlash)
                {
                    idx++;
                    currentByte = source[idx];

                    if (currentByte == JsonConstants.Quote)
                    {
                        destination[written++] = JsonConstants.Quote;
                    }
                    else if (currentByte == 'n')
                    {
                        destination[written++] = JsonConstants.LineFeed;
                    }
                    else if (currentByte == 'r')
                    {
                        destination[written++] = JsonConstants.CarriageReturn;
                    }
                    else if (currentByte == JsonConstants.BackSlash)
                    {
                        destination[written++] = JsonConstants.BackSlash;
                    }
                    else if (currentByte == JsonConstants.Slash)
                    {
                        destination[written++] = JsonConstants.Slash;
                    }
                    else if (currentByte == 't')
                    {
                        destination[written++] = JsonConstants.Tab;
                    }
                    else if (currentByte == 'b')
                    {
                        destination[written++] = JsonConstants.BackSpace;
                    }
                    else if (currentByte == 'f')
                    {
                        destination[written++] = JsonConstants.FormFeed;
                    }
                    else if (currentByte == 'u')
                    {
                        // The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it
                        // Otherwise, the Utf8JsonReader would have alreayd thrown an exception.
                        Debug.Assert(source.Length >= idx + 5);

                        bool result = Utf8Parser.TryParse(source.Slice(idx + 1, 4), out int scalar, out int bytesConsumed, 'x');
                        Debug.Assert(result);
                        Debug.Assert(bytesConsumed == 4);
                        idx += bytesConsumed;     // The loop iteration will increment idx past the last hex digit

                        if (JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
                        {
                            // The first hex value cannot be a low surrogate.
                            if (scalar >= JsonConstants.LowSurrogateStartValue)
                            {
                                ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(scalar);
                            }

                            Debug.Assert(JsonHelpers.IsInRangeInclusive((uint)scalar, JsonConstants.HighSurrogateStartValue, JsonConstants.HighSurrogateEndValue));

                            idx += 3;   // Skip the last hex digit and the next \u

                            // We must have a low surrogate following a high surrogate.
                            if (source.Length < idx + 4 || source[idx - 2] != '\\' || source[idx - 1] != 'u')
                            {
                                ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16();
                            }

                            // The source is known to be valid JSON, and hence if we see a \u, it is guaranteed to have 4 hex digits following it
                            // Otherwise, the Utf8JsonReader would have alreayd thrown an exception.
                            result = Utf8Parser.TryParse(source.Slice(idx, 4), out int lowSurrogate, out bytesConsumed, 'x');
                            Debug.Assert(result);
                            Debug.Assert(bytesConsumed == 4);

                            // If the first hex value is a high surrogate, the next one must be a low surrogate.
                            if (!JsonHelpers.IsInRangeInclusive((uint)lowSurrogate, JsonConstants.LowSurrogateStartValue, JsonConstants.LowSurrogateEndValue))
                            {
                                ThrowHelper.ThrowInvalidOperationException_ReadInvalidUTF16(lowSurrogate);
                            }

                            idx += bytesConsumed - 1;  // The loop iteration will increment idx past the last hex digit

                            // To find the unicode scalar:
                            // (0x400 * (High surrogate - 0xD800)) + Low surrogate - 0xDC00 + 0x10000
                            scalar = (JsonConstants.BitShiftBy10 * (scalar - JsonConstants.HighSurrogateStartValue))
                                     + (lowSurrogate - JsonConstants.LowSurrogateStartValue)
                                     + JsonConstants.UnicodePlane01StartValue;
                        }

#if BUILDING_INBOX_LIBRARY
                        var rune         = new Rune(scalar);
                        int bytesWritten = rune.EncodeToUtf8(destination.Slice(written));
#else
                        EncodeToUtf8Bytes((uint)scalar, destination.Slice(written), out int bytesWritten);
#endif
                        Debug.Assert(bytesWritten <= 4);
                        written += bytesWritten;
                    }
                }
                else
                {
                    destination[written++] = currentByte;
                }
            }
        }