示例#1
0
        /// <summary>
        /// Creates a string from the given null-terminated string
        /// of UTF-8 encoded characters.
        /// </summary>
        /// <param name="buffer">The buffer that contains the string.</param>
        /// <returns>A string.</returns>
        internal static unsafe string FromCString(byte *buffer)
        {
            int utf8Length  = (int)CStringHelpers.StringLength(buffer);
            var utf16Buffer = new char[utf8Length];

            var bufEnd      = buffer + utf8Length;
            int utf16Length = 0;

            while (buffer != bufEnd)
            {
                utf16Length += UnicodeConverter.WriteUtf16CodePoint(
                    UnicodeConverter.ReadUtf8CodePoint(ref buffer, bufEnd),
                    &utf16Buffer[utf16Length]);
            }

            return(new String(utf16Buffer, 0, utf16Length));
        }
示例#2
0
        private int GetCharsImpl(
            byte[] bytes, int byteIndex, int byteCount,
            char[] chars, int charIndex)
        {
            // TODO: the logic in this algorithm is non-trivial. We should
            // test UTF8Decoder extensively.

            // TODO: check that we're not overflowing any of these buffers here.

            int   numCharsParsed        = 0;
            byte *curPtr                = &bytes[byteIndex];
            byte *endPtr                = &bytes[byteIndex + byteCount];
            byte *undecodedByteStartPtr = &undecodedBytes[0];
            byte *undecodedBytePtr;
            bool  eofReached;

            if (HasUndecodedBytes)
            {
                // Try to parse undecoded bytes by appending decoded bytes.

                int newUndecodedByteCount = Math.Min(undecodedByteCount + byteCount, undecodedBytes.Length);
                for (int i = undecodedByteCount; i < newUndecodedByteCount; i++)
                {
                    undecodedBytes[i] = curPtr[i - undecodedByteCount];
                }

                undecodedBytePtr = undecodedByteStartPtr;
                byte *undecodedByteEndPtr = undecodedBytePtr + newUndecodedByteCount;
                uint  codePoint           = UnicodeConverter.ReadUtf8CodePoint(
                    ref undecodedBytePtr, undecodedByteEndPtr, out eofReached);

                if (eofReached)
                {
                    // We don't have enough new bytes to decode a code point.
                    undecodedByteCount = newUndecodedByteCount;
                    return(0);
                }

                // We can compute the total number of *new* bytes we've decoded by first
                // computing the total number of bytes decoded `undecodedByteEndPtr - undecodedBytePtr`
                // and the subtracting the number of undecoded bytes.
                long numNewBytesDecoded = (long)undecodedByteEndPtr - (long)undecodedBytePtr - undecodedByteCount;
                curPtr            += numNewBytesDecoded;
                numCharsParsed    += WriteToCharBuffer(codePoint, chars, charIndex + numCharsParsed);
                undecodedByteCount = 0;
            }

            while (curPtr != endPtr)
            {
                byte *oldCurPtr = curPtr;
                uint  codePoint = UnicodeConverter.ReadUtf8CodePoint(
                    ref curPtr, endPtr, out eofReached);

                if (eofReached)
                {
                    // Stop trying to parse code points; move the rest of the data into
                    // the undecoded bytes buffer.
                    curPtr = oldCurPtr;
                    break;
                }

                numCharsParsed += WriteToCharBuffer(codePoint, chars, charIndex + numCharsParsed);
            }

            // Copy undecoded bytes to the undecoded byte buffer.
            undecodedBytePtr   = undecodedByteStartPtr;
            undecodedByteCount = 0;
            while (curPtr != endPtr)
            {
                *undecodedBytePtr = *curPtr;
                undecodedBytePtr++;
                curPtr++;
                undecodedByteCount++;
            }

            return(numCharsParsed);
        }