internal unsafe EncodingCharBuffer(EncodingNLS enc, DecoderNLS?decoder, char *charStart, int charCount, byte *byteStart, int byteCount) { _enc = enc; _decoder = decoder; _chars = charStart; _charStart = charStart; _charEnd = charStart + charCount; _byteStart = byteStart; _bytes = byteStart; _byteEnd = byteStart + byteCount; if (_decoder == null) { _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer(); } else { _fallbackBuffer = _decoder.FallbackBuffer; } // If we're getting chars or getting char count we don't expect to have // to remember fallbacks between calls (so it should be empty) Debug.Assert(_fallbackBuffer.Remaining == 0, "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount"); _fallbackBufferHelper = new DecoderFallbackBufferHelper(_fallbackBuffer); _fallbackBufferHelper.InternalInitialize(_bytes, _charEnd); }
internal sealed override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS?baseDecoder) { Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0"); Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null"); // Just call GetChars with null char* to do counting return(GetChars(bytes, count, null, 0, baseDecoder)); }
internal static DecoderFallbackBuffer CreateAndInitialize(Encoding encoding, DecoderNLS?decoder, int originalByteCount) { // The original byte count is only used for keeping track of what 'index' value needs // to be passed to the abstract Fallback method. The index value is calculated by subtracting // 'bytes.Length' (where bytes is expected to be the entire remaining input buffer) // from the 'originalByteCount' value specified here. DecoderFallbackBuffer fallbackBuffer = (decoder is null) ? encoding.DecoderFallback.CreateFallbackBuffer() : decoder.FallbackBuffer; fallbackBuffer._encoding = encoding; fallbackBuffer._decoder = decoder; fallbackBuffer._originalByteCount = originalByteCount; return(fallbackBuffer); }
private int InternalReadChars(Span <char> buffer) { Debug.Assert(!_disposed); int numBytes = 0; int index = 0; int charsRemaining = buffer.Length; if (_charBytes == null) { _charBytes = new byte[MaxCharBytesSize]; } while (charsRemaining > 0) { int charsRead = 0; // We really want to know what the minimum number of bytes per char // is for our encoding. Otherwise for UnicodeEncoding we'd have to // do ~1+log(n) reads to read n characters. numBytes = charsRemaining; if (_2BytesPerChar) { numBytes <<= 1; } // We do not want to read even a single byte more than necessary. // // Subtract pending bytes that the decoder may be holding onto. This assumes that each // decoded char corresponds to one or more bytes. Note that custom encodings or encodings with // a custom replacement sequence may violate this assumption. if (numBytes > 1) { DecoderNLS?decoder = _decoder as DecoderNLS; // For internal decoders, we can check whether the decoder has any pending state. // For custom decoders, assume that the decoder has pending state. if (decoder == null || decoder.HasState) { numBytes -= 1; // The worst case is charsRemaining = 2 and UTF32Decoder holding onto 3 pending bytes. We need to read just // one byte in this case. if (_2BytesPerChar && numBytes > 2) { numBytes -= 2; } } } if (numBytes > MaxCharBytesSize) { numBytes = MaxCharBytesSize; } int position = 0; byte[]? byteBuffer = null; if (_isMemoryStream) { Debug.Assert(_stream is MemoryStream); MemoryStream mStream = (MemoryStream)_stream; position = mStream.InternalGetPosition(); numBytes = mStream.InternalEmulateRead(numBytes); byteBuffer = mStream.InternalGetBuffer(); } else { numBytes = _stream.Read(_charBytes, 0, numBytes); byteBuffer = _charBytes; } if (numBytes == 0) { return(buffer.Length - charsRemaining); } Debug.Assert(byteBuffer != null, "expected byteBuffer to be non-null"); checked { if (position < 0 || numBytes < 0 || position > byteBuffer.Length - numBytes) { throw new ArgumentOutOfRangeException(nameof(numBytes)); } if (index < 0 || charsRemaining < 0 || index > buffer.Length - charsRemaining) { throw new ArgumentOutOfRangeException(nameof(charsRemaining)); } unsafe { fixed(byte *pBytes = byteBuffer) fixed(char *pChars = &MemoryMarshal.GetReference(buffer)) { charsRead = _decoder.GetChars(pBytes + position, numBytes, pChars + index, charsRemaining, flush: false); } } } charsRemaining -= charsRead; index += charsRead; } // this should never fail Debug.Assert(charsRemaining >= 0, "We read too many characters."); // we may have read fewer than the number of characters requested if end of stream reached // or if the encoding makes the char count too big for the buffer (e.g. fallback sequence) return(buffer.Length - charsRemaining); }
private int InternalReadChars(Span <char> buffer) { Debug.Assert(!_disposed); int totalCharsRead = 0; while (!buffer.IsEmpty) { int numBytes = buffer.Length; // We really want to know what the minimum number of bytes per char // is for our encoding. Otherwise for UnicodeEncoding we'd have to // do ~1+log(n) reads to read n characters. if (_2BytesPerChar) { numBytes <<= 1; } // We do not want to read even a single byte more than necessary. // // Subtract pending bytes that the decoder may be holding onto. This assumes that each // decoded char corresponds to one or more bytes. Note that custom encodings or encodings with // a custom replacement sequence may violate this assumption. if (numBytes > 1) { DecoderNLS?decoder = _decoder as DecoderNLS; // For internal decoders, we can check whether the decoder has any pending state. // For custom decoders, assume that the decoder has pending state. if (decoder == null || decoder.HasState) { numBytes--; // The worst case is charsRemaining = 2 and UTF32Decoder holding onto 3 pending bytes. We need to read just // one byte in this case. if (_2BytesPerChar && numBytes > 2) { numBytes -= 2; } } } ReadOnlySpan <byte> byteBuffer; if (_isMemoryStream) { Debug.Assert(_stream is MemoryStream); MemoryStream mStream = (MemoryStream)_stream; int position = mStream.InternalGetPosition(); numBytes = mStream.InternalEmulateRead(numBytes); byteBuffer = new ReadOnlySpan <byte>(mStream.InternalGetBuffer(), position, numBytes); } else { _charBytes ??= new byte[MaxCharBytesSize]; if (numBytes > MaxCharBytesSize) { numBytes = MaxCharBytesSize; } numBytes = _stream.Read(_charBytes, 0, numBytes); byteBuffer = new ReadOnlySpan <byte>(_charBytes, 0, numBytes); } if (byteBuffer.IsEmpty) { break; } int charsRead = _decoder.GetChars(byteBuffer, buffer, flush: false); buffer = buffer.Slice(charsRead); totalCharsRead += charsRead; } // we may have read fewer than the number of characters requested if end of stream reached // or if the encoding makes the char count too big for the buffer (e.g. fallback sequence) return(totalCharsRead); }
public unsafe abstract int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS?decoder);
public unsafe abstract int GetCharCount(byte *bytes, int count, DecoderNLS?decoder);
internal sealed override unsafe int GetChars( byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS?baseDecoder) { Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0"); Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null"); Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0"); // Might use a decoder UTF7Encoding.Decoder?decoder = (UTF7Encoding.Decoder?)baseDecoder; // Get our output buffer info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Get decoder info int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; Debug.Assert(!firstByte || decoder.bitCount <= 0, "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set"); } // We may have had bits in the decoder that we couldn't output last time, so do so now if (bitCount >= 16) { // Check our decoder buffer if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF))) { ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert } // Used this one, clean up extra bits bitCount -= 16; } // Loop through the input while (buffer.MoreData) { byte currentByte = buffer.GetNextByte(); int c; if (bitCount >= 0) { // // Modified base 64 encoding. // sbyte v; if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0)) { firstByte = false; bits = (bits << 6) | ((byte)v); bitCount += 6; if (bitCount >= 16) { c = (bits >> (bitCount - 16)) & 0xFFFF; bitCount -= 16; } // If not enough bits just continue else { continue; } } else { // If it wasn't a base 64 byte, everything's going to turn off base 64 mode bitCount = -1; if (currentByte != '-') { // >= 0x80 (because of 1st if statemtn) // We need this check since the _base64Values[b] check below need b <= 0x7f. // This is not a valid base 64 byte. Terminate the shifted-sequence and // emit this byte. // not in base 64 table // According to the RFC 1642 and the example code of UTF-7 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte // Chars won't be updated unless this works, try to fallback if (!buffer.Fallback(currentByte)) { break; // Stop here, didn't throw } // Used that byte, we're done with it continue; } // // The encoding for '+' is "+-". // if (firstByte) { c = '+'; } // We just turn it off if not emitting a +, so we're done. else { continue; } } // // End of modified base 64 encoding block. // } else if (currentByte == '+') { // // Found the start of a modified base 64 encoding block or a plus sign. // bitCount = 0; firstByte = true; continue; } else { // Normal character if (currentByte >= 0x80) { // Try to fallback if (!buffer.Fallback(currentByte)) { break; // Stop here, didn't throw } // Done falling back continue; } // Use the normal character c = currentByte; } if (c >= 0) { // Check our buffer if (!buffer.AddChar((char)c)) { // No room. If it was a plain char we'll try again later. // Note, we'll consume this byte and stick it in decoder, even if we can't output it if (bitCount >= 0) // Can we rememmber this byte (char) { buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed bitCount += 16; // We'll still need that char we have in our bits } break; // didn't throw, stop } } } // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder) if (chars != null && decoder != null) { // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer) if (decoder.MustFlush) { // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder._bytesUsed = buffer.BytesUsed; } // else ignore any hanging bits. // Return our count return(buffer.Count); }
private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan <byte> bytes, int originalBytesLength, Span <char> chars, int originalCharsLength, DecoderNLS?decoder) { // We special-case DecoderReplacementFallback if it's telling us to write a single BMP char, // since we believe this to be relatively common and we can handle it more efficiently than // the base implementation. if (((decoder is null) ? this.DecoderFallback: decoder.Fallback) is DecoderReplacementFallback replacementFallback && replacementFallback.MaxCharCount == 1) { char replacementChar = replacementFallback.DefaultString[0]; int numElementsToConvert = Math.Min(bytes.Length, chars.Length); int idx = 0; fixed(byte *pBytes = &MemoryMarshal.GetReference(bytes)) fixed(char *pChars = &MemoryMarshal.GetReference(chars)) { // In a loop, replace the non-convertible data, then bulk-convert as much as we can. while (idx < numElementsToConvert) { pChars[idx++] = replacementChar; if (idx < numElementsToConvert) { idx += (int)ASCIIUtility.WidenAsciiToUtf16(&pBytes[idx], &pChars[idx], (uint)(numElementsToConvert - idx)); } Debug.Assert(idx <= numElementsToConvert, "Somehow went beyond bounds of source or destination buffer?"); } } // Slice off how much we consumed / wrote. bytes = bytes.Slice(numElementsToConvert); chars = chars.Slice(numElementsToConvert); } // If we couldn't go through our fast fallback mechanism, or if we still have leftover // data because we couldn't consume everything in the loop above, we need to go down the // slow fallback path. if (bytes.IsEmpty) { return(originalCharsLength - chars.Length); // total number of chars written } else { return(base.GetCharsWithFallback(bytes, originalBytesLength, chars, originalCharsLength, decoder)); } }
private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan <byte> bytes, int originalBytesLength, Span <char> chars, int originalCharsLength, DecoderNLS?decoder) { // We special-case DecoderReplacementFallback if it's telling us to write a single U+FFFD char, // since we believe this to be relatively common and we can handle it more efficiently than // the base implementation. if (((decoder is null) ? this.DecoderFallback : decoder.Fallback) is DecoderReplacementFallback replacementFallback && replacementFallback.MaxCharCount == 1 && replacementFallback.DefaultString[0] == UnicodeUtility.ReplacementChar) { // Don't care about the exact OperationStatus, just how much of the payload we were able // to process. Utf8.ToUtf16(bytes, chars, out int bytesRead, out int charsWritten, replaceInvalidSequences: true, isFinalBlock: decoder is null || decoder.MustFlush); // Slice off how much we consumed / wrote. bytes = bytes.Slice(bytesRead); chars = chars.Slice(charsWritten); } // If we couldn't go through our fast fallback mechanism, or if we still have leftover // data because we couldn't consume everything in the loop above, we need to go down the // slow fallback path. if (bytes.IsEmpty) { return(originalCharsLength - chars.Length); // total number of chars written } else { return(base.GetCharsWithFallback(bytes, originalBytesLength, chars, originalCharsLength, decoder)); } }