Пример #1
0
        /// <summary>
        /// Interprets stored bytes as UTF8 bytes, returning the
        /// resulting <see cref="string"/>.
        /// </summary>
        public string Utf8ToString()
        {
            CharsRef @ref = new CharsRef(Length);

            UnicodeUtil.UTF8toUTF16(bytes, Offset, Length, @ref);
            return(@ref.ToString());
        }
Пример #2
0
        /// <summary>
        /// Interprets the given byte array as UTF-8 and converts to UTF-16. The <see cref="CharsRef"/> will be extended if
        /// it doesn't provide enough space to hold the worst case of each byte becoming a UTF-16 codepoint.
        /// <para/>
        /// NOTE: Full characters are read, even if this reads past the length passed (and
        /// can result in an <see cref="IndexOutOfRangeException"/> if invalid UTF-8 is passed).
        /// Explicit checks for valid UTF-8 are not performed.
        /// </summary>
        // TODO: broken if chars.offset != 0
        public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars)
        {
            int out_offset = chars.Offset = 0;

            char[] @out  = chars.Chars = ArrayUtil.Grow(chars.Chars, length);
            int    limit = offset + length;

            while (offset < limit)
            {
                int b = utf8[offset++] & 0xff;
                if (b < 0xc0)
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(b < 0x80);
                    }
                    @out[out_offset++] = (char)b;
                }
                else if (b < 0xe0)
                {
                    @out[out_offset++] = (char)(((b & 0x1f) << 6) + (utf8[offset++] & 0x3f));
                }
                else if (b < 0xf0)
                {
                    @out[out_offset++] = (char)(((b & 0xf) << 12) + ((utf8[offset] & 0x3f) << 6) + (utf8[offset + 1] & 0x3f));
                    offset            += 2;
                }
                else
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(b < 0xf8, "b = 0x{0:x}", b);
                    }
                    int ch = ((b & 0x7) << 18) + ((utf8[offset] & 0x3f) << 12) + ((utf8[offset + 1] & 0x3f) << 6) + (utf8[offset + 2] & 0x3f);
                    offset += 3;
                    if (ch < UNI_MAX_BMP)
                    {
                        @out[out_offset++] = (char)ch;
                    }
                    else
                    {
                        int chHalf = ch - 0x0010000;
                        @out[out_offset++] = (char)((chHalf >> 10) + 0xD800);
                        @out[out_offset++] = (char)((chHalf & HALF_MASK) + 0xDC00);
                    }
                }
            }
            chars.Length = out_offset - chars.Offset;
        }
Пример #3
0
 public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars)
 {
     UTF8toUTF16(bytesRef.Bytes, bytesRef.Offset, bytesRef.Length, chars);
 }