/// <summary> /// Creates a string from the given null-terminated string /// of UTF-8 encoded characters. /// </summary> /// <param name="buffer">The buffer that contains the string.</param> /// <returns>A string.</returns> internal static unsafe string FromCString(byte *buffer) { int utf8Length = (int)CStringHelpers.StringLength(buffer); var utf16Buffer = new char[utf8Length]; var bufEnd = buffer + utf8Length; int utf16Length = 0; while (buffer != bufEnd) { utf16Length += UnicodeConverter.WriteUtf16CodePoint( UnicodeConverter.ReadUtf8CodePoint(ref buffer, bufEnd), &utf16Buffer[utf16Length]); } return(new String(utf16Buffer, 0, utf16Length)); }
private int GetCharsImpl( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) { // TODO: the logic in this algorithm is non-trivial. We should // test UTF8Decoder extensively. // TODO: check that we're not overflowing any of these buffers here. int numCharsParsed = 0; byte *curPtr = &bytes[byteIndex]; byte *endPtr = &bytes[byteIndex + byteCount]; byte *undecodedByteStartPtr = &undecodedBytes[0]; byte *undecodedBytePtr; bool eofReached; if (HasUndecodedBytes) { // Try to parse undecoded bytes by appending decoded bytes. int newUndecodedByteCount = Math.Min(undecodedByteCount + byteCount, undecodedBytes.Length); for (int i = undecodedByteCount; i < newUndecodedByteCount; i++) { undecodedBytes[i] = curPtr[i - undecodedByteCount]; } undecodedBytePtr = undecodedByteStartPtr; byte *undecodedByteEndPtr = undecodedBytePtr + newUndecodedByteCount; uint codePoint = UnicodeConverter.ReadUtf8CodePoint( ref undecodedBytePtr, undecodedByteEndPtr, out eofReached); if (eofReached) { // We don't have enough new bytes to decode a code point. undecodedByteCount = newUndecodedByteCount; return(0); } // We can compute the total number of *new* bytes we've decoded by first // computing the total number of bytes decoded `undecodedByteEndPtr - undecodedBytePtr` // and the subtracting the number of undecoded bytes. long numNewBytesDecoded = (long)undecodedByteEndPtr - (long)undecodedBytePtr - undecodedByteCount; curPtr += numNewBytesDecoded; numCharsParsed += WriteToCharBuffer(codePoint, chars, charIndex + numCharsParsed); undecodedByteCount = 0; } while (curPtr != endPtr) { byte *oldCurPtr = curPtr; uint codePoint = UnicodeConverter.ReadUtf8CodePoint( ref curPtr, endPtr, out eofReached); if (eofReached) { // Stop trying to parse code points; move the rest of the data into // the undecoded bytes buffer. curPtr = oldCurPtr; break; } numCharsParsed += WriteToCharBuffer(codePoint, chars, charIndex + numCharsParsed); } // Copy undecoded bytes to the undecoded byte buffer. undecodedBytePtr = undecodedByteStartPtr; undecodedByteCount = 0; while (curPtr != endPtr) { *undecodedBytePtr = *curPtr; undecodedBytePtr++; curPtr++; undecodedByteCount++; } return(numCharsParsed); }