unsafe static int InternalGetChars (byte* bytes, int byteCount, char* chars, int charCount, ref uint leftOverBits, ref uint leftOverCount, object provider, ref DecoderFallbackBuffer fallbackBuffer, ref byte[] bufferArg, bool flush) { int charIndex = 0, byteIndex = 0; int length = charCount; int posn = charIndex; if (leftOverCount == 0) { int end = byteIndex + byteCount; for (; byteIndex < end; posn++, byteIndex++, byteCount--) { if (bytes[byteIndex] < 0x80) { if (posn >= length) throw new ArgumentException ("Insufficient space available.", "chars"); chars [posn] = (char) bytes [byteIndex]; } else { break; } } } // Convert the bytes into the output buffer. uint leftSize = ((leftOverCount >> 4) & (uint) 0x0F); uint leftSoFar = (leftOverCount & (uint) 0x0F); uint leftBits = leftOverBits; uint ch; int byteEnd = byteIndex + byteCount; for (; byteIndex < byteEnd; byteIndex++) { // Fetch the next character from the byte buffer. ch = (uint) (bytes [byteIndex]); if (leftSize == 0) { // Process a UTF-8 start character. if (ch < (uint) 0x0080) { // Single-byte UTF-8 character. if (posn >= length) throw new ArgumentException ("Insufficient space available.", "chars"); chars[posn++] = (char) ch; } else if ((ch & (uint) 0xE0) == (uint) 0xC0) { // Double-byte UTF-8 character. leftBits = (ch & (uint) 0x1F); leftSoFar = 1; leftSize = 2; } else if ((ch & (uint) 0xF0) == (uint) 0xE0) { // Three-byte UTF-8 character. leftBits = (ch & (uint) 0x0F); leftSoFar = 1; leftSize = 3; } else if ((ch & (uint) 0xF8) == (uint) 0xF0) { // Four-byte UTF-8 character. leftBits = (ch & (uint) 0x07); leftSoFar = 1; leftSize = 4; } else if ((ch & (uint) 0xFC) == (uint) 0xF8) { // Five-byte UTF-8 character. leftBits = (ch & (uint) 0x03); leftSoFar = 1; leftSize = 5; } else if ((ch & (uint) 0xFE) == (uint) 0xFC) { // Six-byte UTF-8 character. leftBits = (ch & (uint) 0x03); leftSoFar = 1; leftSize = 6; } else { // Invalid UTF-8 start character. Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex, 1, chars, ref posn); } } else { // Process an extra byte in a multi-byte sequence. if ((ch & (uint) 0xC0) == (uint) 0x80) { leftBits = ((leftBits << 6) | (ch & (uint) 0x3F)); if (++leftSoFar >= leftSize) { // We have a complete character now. if (leftBits < (uint) 0x10000) { // is it an overlong ? bool overlong = false; switch (leftSize) { case 2: overlong = (leftBits <= 0x7F); break; case 3: overlong = (leftBits <= 0x07FF); break; case 4: overlong = (leftBits <= 0xFFFF); break; case 5: overlong = (leftBits <= 0x1FFFFF); break; case 6: overlong = (leftBits <= 0x03FFFFFF); break; } if (overlong) { Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn); } else if ((leftBits & 0xF800) == 0xD800) { // UTF-8 doesn't use surrogate characters Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn); } else { if (posn >= length) throw new ArgumentException ("Insufficient space available.", "chars"); chars [posn++] = (char) leftBits; } } else if (leftBits < (uint) 0x110000) { if ((posn + 2) > length) throw new ArgumentException ("Insufficient space available.", "chars"); leftBits -= (uint) 0x10000; chars [posn++] = (char) ((leftBits >> 10) + (uint) 0xD800); chars [posn++] = (char) ((leftBits & (uint) 0x3FF) + (uint) 0xDC00); } else { Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn); } leftSize = 0; } } else { // Invalid UTF-8 sequence: clear and restart. Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn); leftSize = 0; --byteIndex; } } } if (flush && leftSize != 0) { // We had left-over bytes that didn't make up // a complete UTF-8 character sequence. Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, byteIndex - leftSoFar, leftSoFar, chars, ref posn); } leftOverCount = (leftSoFar | (leftSize << 4)); leftOverBits = leftBits; // Return the final length to the caller. return posn - charIndex; }
int GetChars (byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, ref DecoderFallbackBuffer buffer) { if (bytes == null) throw new ArgumentNullException ("bytes"); if (chars == null) throw new ArgumentNullException ("chars"); if (byteIndex < 0 || byteIndex > bytes.Length) throw new ArgumentOutOfRangeException ("byteIndex"); if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) throw new ArgumentOutOfRangeException ("byteCount"); if (charIndex < 0 || charIndex > chars.Length) throw new ArgumentOutOfRangeException ("charIndex"); if ((chars.Length - charIndex) < byteCount) throw new ArgumentException ("Insufficient space available."); int count = byteCount; while (count-- > 0) { char c = (char) bytes [byteIndex++]; if (c < 0x80) { chars [charIndex++] = c; } else { if (buffer == null) buffer = DecoderFallback.CreateFallbackBuffer (); var thisByte = new [] { bytes [byteIndex-1] }; buffer.Fallback (thisByte, 0); while (buffer.Remaining > 0) { if (charIndex < chars.Length) { chars [charIndex++] = buffer.GetNextChar (); continue; } throw new ArgumentException ("The output char buffer is too small to contain the decoded characters."); } } } return byteCount; }
// Get the characters that result from decoding a byte buffer. unsafe static int InternalGetChars (byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, ref uint leftOverBits, ref uint leftOverCount, object provider, ref DecoderFallbackBuffer fallbackBuffer, ref byte[] bufferArg, bool flush) { // Validate the parameters. if (bytes == null) throw new ArgumentNullException ("bytes"); if (chars == null) throw new ArgumentNullException ("chars"); if (byteIndex < 0 || byteIndex > bytes.Length) throw new ArgumentOutOfRangeException ("byteIndex"); if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) throw new ArgumentOutOfRangeException ("byteCount"); if (charIndex < 0 || charIndex > chars.Length) throw new ArgumentOutOfRangeException ("charIndex"); if (charIndex == chars.Length && byteCount == 0) return 0; fixed (char* cptr = chars) { if (byteCount == 0 || byteIndex == bytes.Length) return InternalGetChars (null, 0, cptr + charIndex, chars.Length - charIndex, ref leftOverBits, ref leftOverCount, provider, ref fallbackBuffer, ref bufferArg, flush); // otherwise... fixed (byte* bptr = bytes) { return InternalGetChars (bptr + byteIndex, byteCount, cptr + charIndex, chars.Length - charIndex, ref leftOverBits, ref leftOverCount, provider, ref fallbackBuffer, ref bufferArg, flush); } } }
// for GetChars() static unsafe void Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte[] bufferArg, byte* bytes, long byteIndex, uint size, char* chars, ref int charIndex) { if (buffer == null) { var fallback = provider as DecoderFallback; if (fallback != null) buffer = fallback.CreateFallbackBuffer (); else buffer = ((Decoder) provider).FallbackBuffer; } if (bufferArg == null) bufferArg = new byte [1]; for (int i = 0; i < size; i++) { bufferArg [0] = bytes [byteIndex + i]; buffer.Fallback (bufferArg, 0); while (buffer.Remaining > 0) chars [charIndex++] = buffer.GetNextChar (); buffer.Reset (); } }
// for GetCharCount() static unsafe int Fallback (object provider, ref DecoderFallbackBuffer buffer, ref byte[] bufferArg, byte* bytes, long index, uint size) { if (buffer == null) { var fallback = provider as DecoderFallback; if (fallback != null) buffer = fallback.CreateFallbackBuffer (); else buffer = ((Decoder) provider).FallbackBuffer; } if (bufferArg == null) bufferArg = new byte [1]; int ret = 0; for (int i = 0; i < size; i++) { bufferArg [0] = bytes [(int) index + i]; buffer.Fallback (bufferArg, 0); ret += buffer.Remaining; buffer.Reset (); } return ret; }
unsafe static int InternalGetCharCount (byte* bytes, int count, uint leftOverBits, uint leftOverCount, object provider, ref DecoderFallbackBuffer fallbackBuffer, ref byte[] bufferArg, bool flush) { int length = 0; int index = 0; if (leftOverCount == 0) { int end = index + count; for (; index < end; index++, count--) { if (bytes [index] < 0x80) length++; else break; } } // Determine the number of characters that we have. uint leftSize = ((leftOverCount >> 4) & (uint) 0x0F); uint leftSoFar = (leftOverCount & (uint) 0x0F); uint leftBits = leftOverBits; uint ch; while (count > 0) { ch = (uint) bytes[index++]; --count; if (leftSize == 0) { // Process a UTF-8 start character. if (ch < (uint) 0x0080) { // Single-byte UTF-8 character. ++length; } else if ((ch & (uint) 0xE0) == (uint) 0xC0) { // Double-byte UTF-8 character. leftBits = (ch & (uint) 0x1F); leftSoFar = 1; leftSize = 2; } else if ((ch & (uint) 0xF0) == (uint) 0xE0) { // Three-byte UTF-8 character. leftBits = (ch & (uint) 0x0F); leftSoFar = 1; leftSize = 3; } else if ((ch & (uint) 0xF8) == (uint) 0xF0) { // Four-byte UTF-8 character. leftBits = (ch & (uint) 0x07); leftSoFar = 1; leftSize = 4; } else if ((ch & (uint) 0xFC) == (uint) 0xF8) { // Five-byte UTF-8 character. leftBits = (ch & (uint) 0x03); leftSoFar = 1; leftSize = 5; } else if ((ch & (uint) 0xFE) == (uint) 0xFC) { // Six-byte UTF-8 character. leftBits = (ch & (uint) 0x03); leftSoFar = 1; leftSize = 6; } else { // Invalid UTF-8 start character. length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - 1, 1); } } else { // Process an extra byte in a multi-byte sequence. if ((ch & (uint) 0xC0) == (uint) 0x80) { leftBits = ((leftBits << 6) | (ch & (uint) 0x3F)); if (++leftSoFar >= leftSize) { // We have a complete character now. if (leftBits < (uint) 0x10000) { // is it an overlong ? bool overlong = false; switch (leftSize) { case 2: overlong = (leftBits <= 0x7F); break; case 3: overlong = (leftBits <= 0x07FF); break; case 4: overlong = (leftBits <= 0xFFFF); break; case 5: overlong = (leftBits <= 0x1FFFFF); break; case 6: overlong = (leftBits <= 0x03FFFFFF); break; } if (overlong) { length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar); } else if ((leftBits & 0xF800) == 0xD800) { // UTF-8 doesn't use surrogate characters length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar); } else { ++length; } } else if (leftBits < (uint) 0x110000) { length += 2; } else { length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar); } leftSize = 0; } } else { // Invalid UTF-8 sequence: clear and restart. length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar); leftSize = 0; --index; ++count; } } } if (flush && leftSize != 0) { // We had left-over bytes that didn't make up // a complete UTF-8 character sequence. length += Fallback (provider, ref fallbackBuffer, ref bufferArg, bytes, index - leftSoFar, leftSoFar); } // Return the final length to the caller. return length; }
// Internal version of "GetCharCount" which can handle a rolling // state between multiple calls to this method. unsafe static int InternalGetCharCount (byte[] bytes, int index, int count, uint leftOverBits, uint leftOverCount, object provider, ref DecoderFallbackBuffer fallbackBuffer, ref byte[] bufferArg, bool flush) { // Validate the parameters. if (bytes == null) throw new ArgumentNullException ("bytes"); if (index < 0 || index > bytes.Length) throw new ArgumentOutOfRangeException ("index"); if (count < 0 || count > (bytes.Length - index)) throw new ArgumentOutOfRangeException ("count"); if (count == 0) return 0; fixed (byte *bptr = bytes) { return InternalGetCharCount (bptr + index, count, leftOverBits, leftOverCount, provider, ref fallbackBuffer, ref bufferArg, flush); } }