// TODO: Validate constructors if we decide to keep this class
        public Utf8EncodedCodePoint(char character) : this()
        {
            if (char.IsSurrogate(character))
            {
                throw new ArgumentOutOfRangeException("character", "Surrogate characters are not allowed");
            }

            UnicodeCodePoint codePoint = (UnicodeCodePoint)(uint)character;

            unsafe
            {
                fixed(byte *encodedData = &_byte0)
                {
                    Span <byte> buffer = new Span <byte>(encodedData, 4);

                    if (!Utf8Encoder.TryEncodeCodePoint(codePoint, buffer, out _length))
                    {
                        // TODO: Change exception type
                        throw new Exception("Internal error: this should never happen as codePoint is within acceptable range and is not surrogate");
                    }
                }
            }
        }
Example #2
0
        // TODO: This should return Utf16CodeUnits which should wrap byte[]/Span<byte>, same for other encoders
        private static byte[] GetUtf8BytesFromString(string s)
        {
            int len = 0;

            for (int i = 0; i < s.Length; /* intentionally no increment */)
            {
                UnicodeCodePoint codePoint;
                int encodedChars;
                if (!Utf16LittleEndianEncoder.TryDecodeCodePointFromString(s, i, out codePoint, out encodedChars))
                {
                    throw new ArgumentException("s", "Invalid surrogate pair in the string.");
                }

                if (encodedChars <= 0)
                {
                    // TODO: Fix exception type
                    throw new Exception("internal error");
                }

                int encodedBytes = Utf8Encoder.GetNumberOfEncodedBytes(codePoint);
                if (encodedBytes == 0)
                {
                    // TODO: Fix exception type
                    throw new Exception("Internal error: Utf16Decoder somehow got CodePoint out of range");
                }
                len += encodedBytes;

                i += encodedChars;
            }

            byte[] bytes = new byte[len];
            unsafe
            {
                fixed(byte *array_pinned = bytes)
                {
                    Span <byte> p = new Span <byte>(array_pinned, len);

                    for (int i = 0; i < s.Length; /* intentionally no increment */)
                    {
                        UnicodeCodePoint codePoint;
                        int encodedChars;
                        if (Utf16LittleEndianEncoder.TryDecodeCodePointFromString(s, i, out codePoint, out encodedChars))
                        {
                            i += encodedChars;
                            int encodedBytes;
                            if (Utf8Encoder.TryEncodeCodePoint(codePoint, p, out encodedBytes))
                            {
                                p = p.Slice(encodedBytes);
                            }
                            else
                            {
                                // TODO: Fix exception type
                                throw new Exception("Internal error: Utf16Decoder somehow got CodePoint out of range or the buffer is too small");
                            }
                        }
                        else
                        {
                            // TODO: Fix exception type
                            throw new Exception("Internal error: we did pre-validation of the string, nothing should go wrong");
                        }
                    }
                }
            }

            return(bytes);
        }