Ejemplo n.º 1
0
        public override string ToString()
        {
            // get length first
            // TODO: Optimize for characters of length 1 or 2 in UTF-8 representation (no need to read anything)
            // TODO: is compiler gonna do the right thing here?
            // TODO: Should we use Linq's Count()?
            int len = 0;

            foreach (var codePoint in CodePoints)
            {
                len++;
                if (UnicodeCodePoint.IsSurrogate(codePoint))
                {
                    len++;
                }
            }

            unsafe
            {
                Span <byte> buffer;
                char *      stackChars = null;
                char[]      characters = null;

                if (len <= 256)
                {
                    char *stackallocedChars = stackalloc char[len];
                    stackChars = stackallocedChars;
                    buffer     = new Span <byte>(stackChars, len * 2);
                }
                else
                {
                    // HACK: Can System.Buffers be used here?
                    characters = new char[len];
                    buffer     = characters.Slice().Cast <char, byte>();
                }

                foreach (var codePoint in CodePoints)
                {
                    int bytesEncoded;
                    if (!Utf16LittleEndianEncoder.TryEncodeCodePoint(codePoint, buffer, out bytesEncoded))
                    {
                        // TODO: Change Exception type
                        throw new Exception("invalid character");
                    }
                    buffer = buffer.Slice(bytesEncoded);
                }

                // TODO: We already have a char[] and this will copy, how to avoid that
                return(stackChars != null
                    ? new string(stackChars, 0, len)
                    : new string(characters));
            }
        }
        public static bool TryDecodeCodePoint(Span <byte> buffer, out UnicodeCodePoint codePoint, out int encodedBytes)
        {
            if (buffer.Length < 2)
            {
                codePoint    = default(UnicodeCodePoint);
                encodedBytes = default(int);
                // buffer too small
                return(false);
            }

            uint codePointValue = (uint)buffer[0] | ((uint)buffer[1] << 8);

            encodedBytes = 2;
            if (UnicodeCodePoint.IsSurrogate((UnicodeCodePoint)codePointValue))
            {
                // TODO: Check if compiler optimized it so codePointValue low range is checked only once
                if (!UnicodeCodePoint.IsHighSurrogate((UnicodeCodePoint)codePointValue) || buffer.Length < 4)
                {
                    codePoint    = default(UnicodeCodePoint);
                    encodedBytes = default(int);
                    // invalid high surrogate or buffer too small
                    return(false);
                }
                unchecked
                {
                    codePointValue -= UnicodeConstants.Utf16HighSurrogateFirstCodePoint;
                    encodedBytes   += 2;
                }
                // high surrogate contains 10 first bits of the code point
                codePointValue <<= 10;

                uint lowSurrogate = (uint)buffer[2] | ((uint)buffer[3] << 8);
                if (!UnicodeCodePoint.IsLowSurrogate((UnicodeCodePoint)lowSurrogate))
                {
                    codePoint    = default(UnicodeCodePoint);
                    encodedBytes = default(int);
                    // invalid low surrogate character
                    return(false);
                }

                unchecked
                {
                    lowSurrogate -= UnicodeConstants.Utf16LowSurrogateFirstCodePoint;
                }
                codePointValue |= lowSurrogate;
            }

            codePoint = (UnicodeCodePoint)codePointValue;

            return(true);
        }
Ejemplo n.º 3
0
        public override string ToString()
        {
            // get length first
            // TODO: Optimize for characters of length 1 or 2 in UTF-8 representation (no need to read anything)
            // TODO: is compiler gonna do the right thing here?
            // TODO: Should we use Linq's Count()?
            int len = 0;

            foreach (var codePoint in CodePoints)
            {
                len++;
                if (UnicodeCodePoint.IsSurrogate(codePoint))
                {
                    len++;
                }
            }

            char[] characters = new char[len];
            unsafe
            {
                fixed(char *pinnedCharacters = characters)
                {
                    Span <byte> buffer = new Span <byte>((byte *)pinnedCharacters, len * 2);

                    foreach (var codePoint in CodePoints)
                    {
                        int bytesEncoded;
                        if (!Utf16LittleEndianEncoder.TryEncodeCodePoint(codePoint, buffer, out bytesEncoded))
                        {
                            // TODO: Change Exception type
                            throw new Exception("invalid character");
                        }
                        buffer = buffer.Slice(bytesEncoded);
                    }
                }
            }

            // TODO: We already have a char[] and this will copy, how to avoid that
            return(new string(characters));
        }