internal unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { Contract.Requires(count >= 0); Contract.Requires(Contract.WritableBytes(chars) >= (uint)(sizeof(char) * count)); return this.GetByteCount(chars, count); }
internal unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { Contract.Requires(count >= 0); Contract.Requires(Contract.WritableBytes(chars) >= (uint)(sizeof(char) * count)); return(this.GetByteCount(chars, count)); }
internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { char ch2; base.CheckMemorySection(); char charLeftOver = '\0'; if (encoder != null) { charLeftOver = encoder.charLeftOver; if (encoder.InternalHasFallbackBuffer && (encoder.FallbackBuffer.Remaining > 0)) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } } int num = 0; char *charEnd = chars + count; EncoderFallbackBuffer fallbackBuffer = null; if (charLeftOver > '\0') { fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); fallbackBuffer.InternalFallback(charLeftOver, ref chars); } while (((ch2 = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch2 == '\0') { ch2 = chars[0]; chars++; } ushort num2 = this.mapUnicodeToBytes[ch2]; if ((num2 == 0) && (ch2 != '\0')) { if (fallbackBuffer == null) { if (encoder == null) { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false); } fallbackBuffer.InternalFallback(ch2, ref chars); } else { num++; if (num2 >= 0x100) { num++; } } } return(num); }
internal unsafe EncodingByteBuffer(EncodingNLS inEncoding, EncoderNLS inEncoder, byte *inByteStart, int inByteCount, char *inCharStart, int inCharCount) { _enc = inEncoding; _encoder = inEncoder; _charStart = inCharStart; _chars = inCharStart; _charEnd = inCharStart + inCharCount; _bytes = inByteStart; _byteStart = inByteStart; _byteEnd = inByteStart + inByteCount; if (_encoder == null) { fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = _encoder.FallbackBuffer; // If we're not converting we must not have data in our fallback buffer if (_encoder.m_throwOnOverflow && _encoder.InternalHasFallbackBuffer && fallbackBuffer.Remaining > 0) { throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, _encoder.Encoding.EncodingName, _encoder.Fallback.GetType())); } } fallbackBufferHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackBufferHelper.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null); }
internal unsafe EncodingByteBuffer(EncodingNLS inEncoding, EncoderNLS inEncoder, byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount) { _enc = inEncoding; _encoder = inEncoder; _charStart = inCharStart; _chars = inCharStart; _charEnd = inCharStart + inCharCount; _bytes = inByteStart; _byteStart = inByteStart; _byteEnd = inByteStart + inByteCount; if (_encoder == null) fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer(); else { fallbackBuffer = _encoder.FallbackBuffer; // If we're not converting we must not have data in our fallback buffer if (_encoder.m_throwOnOverflow && _encoder.InternalHasFallbackBuffer && fallbackBuffer.Remaining > 0) throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, _encoder.Encoding.EncodingName, _encoder.Fallback.GetType())); } fallbackBufferHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackBufferHelper.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null); }
// // End of standard methods copied from EncodingNLS.cs // internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS baseEncoder) { Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null"); Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0"); // Just call GetBytes with bytes == null return(GetBytes(chars, count, null, 0, baseEncoder)); }
internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { this.CheckMemorySection(); char ch1 = char.MinValue; EncoderReplacementFallback replacementFallback; if (encoder != null) { ch1 = encoder.charLeftOver; replacementFallback = encoder.Fallback as EncoderReplacementFallback; } else { replacementFallback = this.EncoderFallback as EncoderReplacementFallback; } if (replacementFallback != null && replacementFallback.MaxCharCount == 1) { if ((int)ch1 > 0) { ++count; } return(count); } EncoderFallbackBuffer encoderFallbackBuffer = (EncoderFallbackBuffer)null; int num = 0; char *charEnd = chars + count; if ((int)ch1 > 0) { encoderFallbackBuffer = encoder.FallbackBuffer; encoderFallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); encoderFallbackBuffer.InternalFallback(ch1, ref chars); } char ch2; while ((int)(ch2 = encoderFallbackBuffer == null ? char.MinValue : encoderFallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { if ((int)ch2 == 0) { ch2 = *chars; chars += 2; } if ((int)this.mapUnicodeToBytes[(int)ch2] == 0 && (int)ch2 != 0) { if (encoderFallbackBuffer == null) { encoderFallbackBuffer = encoder != null ? encoder.FallbackBuffer : this.encoderFallback.CreateFallbackBuffer(); encoderFallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false); } encoderFallbackBuffer.InternalFallback(ch2, ref chars); } else { ++num; } } return(num); }
internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { char ch2; base.CheckMemorySection(); char charLeftOver = '\0'; if (encoder != null) { charLeftOver = encoder.charLeftOver; if (encoder.InternalHasFallbackBuffer && (encoder.FallbackBuffer.Remaining > 0)) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } } int num = 0; char* charEnd = chars + count; EncoderFallbackBuffer fallbackBuffer = null; if (charLeftOver > '\0') { fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); fallbackBuffer.InternalFallback(charLeftOver, ref chars); } while (((ch2 = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch2 == '\0') { ch2 = chars[0]; chars++; } ushort num2 = this.mapUnicodeToBytes[ch2]; if ((num2 == 0) && (ch2 != '\0')) { if (fallbackBuffer == null) { if (encoder == null) { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false); } fallbackBuffer.InternalFallback(ch2, ref chars); } else { num++; if (num2 >= 0x100) { num++; } } } return num; }
[System.Security.SecurityCritical] // auto-generated internal unsafe void InternalInitialize(char *_charStart, char *_charEnd, EncoderNLS _encoder, bool _setEncoder) { charStart = _charStart; charEnd = _charEnd; encoder = _encoder; setEncoder = _setEncoder; bUsedEncoder = false; bFallingBack = false; iRecursionCount = 0; }
[System.Security.SecurityCritical] // auto-generated internal unsafe void InternalInitialize(char* _charStart, char* _charEnd, EncoderNLS _encoder, bool _setEncoder) { charStart = _charStart; charEnd = _charEnd; encoder = _encoder; setEncoder = _setEncoder; bUsedEncoder = false; bFallingBack = false; iRecursionCount = 0; }
// Set the above values // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these. internal unsafe void InternalInitialize(char *charStart, char *charEnd, EncoderNLS encoder, bool setEncoder) { this.charStart = charStart; this.charEnd = charEnd; this.encoder = encoder; this.setEncoder = setEncoder; this.bUsedEncoder = false; this.bFallingBack = false; this.iRecursionCount = 0; }
internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { this.CheckMemorySection(); char ch1 = char.MinValue; if (encoder != null) { ch1 = encoder.charLeftOver; if (encoder.InternalHasFallbackBuffer && encoder.FallbackBuffer.Remaining > 0) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", (object)this.EncodingName, (object)encoder.Fallback.GetType())); } } int num1 = 0; char *charEnd = chars + count; EncoderFallbackBuffer encoderFallbackBuffer = (EncoderFallbackBuffer)null; if ((int)ch1 > 0) { encoderFallbackBuffer = encoder.FallbackBuffer; encoderFallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); encoderFallbackBuffer.InternalFallback(ch1, ref chars); } char ch2; while ((int)(ch2 = encoderFallbackBuffer == null ? char.MinValue : encoderFallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { if ((int)ch2 == 0) { ch2 = *chars; chars += 2; } ushort num2 = this.mapUnicodeToBytes[ch2]; if ((int)num2 == 0 && (int)ch2 != 0) { if (encoderFallbackBuffer == null) { encoderFallbackBuffer = encoder != null ? encoder.FallbackBuffer : this.encoderFallback.CreateFallbackBuffer(); encoderFallbackBuffer.InternalInitialize(charEnd - count, charEnd, encoder, false); } encoderFallbackBuffer.InternalFallback(ch2, ref chars); } else { ++num1; if ((int)num2 >= 256) { ++num1; } } } return(num1); }
internal static EncoderFallbackBuffer CreateAndInitialize(Encoding encoding, EncoderNLS encoder, int originalCharCount) { // The original char count is only used for keeping track of what 'index' value needs // to be passed to the abstract Fallback method. The index value is calculated by subtracting // 'chars.Length' (where chars is expected to be the entire remaining input buffer) // from the 'originalCharCount' value specified here. EncoderFallbackBuffer fallbackBuffer = (encoder is null) ? encoding.EncoderFallback.CreateFallbackBuffer() : encoder.FallbackBuffer; fallbackBuffer.encoding = encoding; fallbackBuffer.encoder = encoder; fallbackBuffer.originalCharCount = originalCharCount; return(fallbackBuffer); }
public object GetRealObject(StreamingContext context) { Encoder encoder = _encoding.GetEncoder(); if (_fallback != null) { encoder.Fallback = _fallback; if (_charLeftOver != default(char)) { EncoderNLS encoderNls = encoder as EncoderNLS; if (encoderNls != null) { encoderNls.charLeftOver = _charLeftOver; } } } return(encoder); }
// End of overridden methods which use EncodingForwarder internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null"); Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0"); char *end = chars + count; char *charStart = chars; int byteCount = 0; char highSurrogate = '\0'; // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; char *charsForFallback; if (encoder != null) { highSurrogate = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; // We mustn't have left over fallback data when counting if (fallbackBuffer.Remaining > 0) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", this.EncodingName, encoder.Fallback.GetType())); } } else { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, end, encoder, false); char ch; TryAgain: while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Do we need a low surrogate? if (highSurrogate != '\0') { // // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here. // if (Char.IsLowSurrogate(ch)) { // They're all legal highSurrogate = '\0'; // // One surrogate pair will be translated into 4 bytes UTF32. // byteCount += 4; continue; } // We are missing our low surrogate, decrement chars and fallback the high surrogate // The high surrogate may have come from the encoder, but nothing else did. Debug.Assert(chars > charStart, "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate"); chars--; // Do the fallback charsForFallback = chars; fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); chars = charsForFallback; // We're going to fallback the old high surrogate. highSurrogate = '\0'; continue; } // Do we have another high surrogate? if (Char.IsHighSurrogate(ch)) { // // We'll have a high surrogate to check next time. // highSurrogate = ch; continue; } // Check for illegal characters if (Char.IsLowSurrogate(ch)) { // We have a leading low surrogate, do the fallback charsForFallback = chars; fallbackBuffer.InternalFallback(ch, ref charsForFallback); chars = charsForFallback; // Try again with fallback buffer continue; } // We get to add the character (4 bytes UTF32) byteCount += 4; } // May have to do our last surrogate if ((encoder == null || encoder.MustFlush) && highSurrogate > 0) { // We have to do the fallback for the lonely high surrogate charsForFallback = chars; fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); chars = charsForFallback; highSurrogate = (char)0; goto TryAgain; } // Check for overflows. if (byteCount < 0) { throw new ArgumentOutOfRangeException(nameof(count), Environment.GetResourceString( "ArgumentOutOfRange_GetByteCountOverflow")); } // Shouldn't have anything in fallback buffer for GetByteCount // (don't have to check m_throwOnOverflow for count) Debug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end"); // Return our count return(byteCount); }
public unsafe abstract int GetByteCount(char *chars, int count, EncoderNLS encoder);
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { char ch4; char charLeftOver = '\0'; EncoderReplacementFallback encoderFallback = null; EncoderFallbackBuffer fallbackBuffer = null; char *charEnd = chars + charCount; byte *numPtr = bytes; char *charStart = chars; if (encoder != null) { charLeftOver = encoder.charLeftOver; encoderFallback = encoder.Fallback as EncoderReplacementFallback; if (encoder.InternalHasFallbackBuffer) { fallbackBuffer = encoder.FallbackBuffer; if ((fallbackBuffer.Remaining > 0) && encoder.m_throwOnOverflow) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } } else { encoderFallback = base.EncoderFallback as EncoderReplacementFallback; } if ((encoderFallback != null) && (encoderFallback.MaxCharCount == 1)) { char ch2 = encoderFallback.DefaultString[0]; if (ch2 <= '\x007f') { if (charLeftOver > '\0') { if (byteCount == 0) { base.ThrowBytesOverflow(encoder, true); } bytes++; bytes[0] = (byte)ch2; byteCount--; } if (byteCount < charCount) { base.ThrowBytesOverflow(encoder, byteCount < 1); charEnd = chars + byteCount; } while (chars < charEnd) { chars++; char ch3 = chars[0]; if (ch3 >= '\x0080') { bytes++; bytes[0] = (byte)ch2; } else { bytes++; bytes[0] = (byte)ch3; } } if (encoder != null) { encoder.charLeftOver = '\0'; encoder.m_charsUsed = (int)((long)((chars - charStart) / 2)); } return((int)((long)((bytes - numPtr) / 1))); } } byte *numPtr2 = bytes + byteCount; if (charLeftOver > '\0') { fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true); fallbackBuffer.InternalFallback(charLeftOver, ref chars); } while (((ch4 = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch4 == '\0') { ch4 = chars[0]; chars++; } if (ch4 > '\x007f') { if (fallbackBuffer == null) { if (encoder == null) { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } fallbackBuffer.InternalFallback(ch4, ref chars); } else { if (bytes >= numPtr2) { if ((fallbackBuffer == null) || !fallbackBuffer.bFallingBack) { chars--; } else { fallbackBuffer.MovePrevious(); } base.ThrowBytesOverflow(encoder, bytes == numPtr); break; } bytes[0] = (byte)ch4; bytes++; } } if (encoder != null) { if ((fallbackBuffer != null) && !fallbackBuffer.bUsedEncoder) { encoder.charLeftOver = '\0'; } encoder.m_charsUsed = (int)((long)((chars - charStart) / 2)); } return((int)((long)((bytes - numPtr) / 1))); }
internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder, byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount) { this.enc = inEncoding; this.encoder = inEncoder; this.charStart = inCharStart; this.chars = inCharStart; this.charEnd = inCharStart + inCharCount; this.bytes = inByteStart; this.byteStart = inByteStart; this.byteEnd = inByteStart + inByteCount; if (this.encoder == null) { this.fallbackBuffer = this.enc.EncoderFallback.CreateFallbackBuffer(); } else { this.fallbackBuffer = this.encoder.FallbackBuffer; if ((this.encoder.m_throwOnOverflow && this.encoder.InternalHasFallbackBuffer) && (this.fallbackBuffer.Remaining > 0)) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.encoder.Encoding.EncodingName, this.encoder.Fallback.GetType() })); } } this.fallbackBuffer.InternalInitialize(this.chars, this.charEnd, this.encoder, this.bytes != null); }
internal virtual unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { return this.GetBytes(chars, charCount, bytes, byteCount); }
internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { char ch2; char *charEnd = chars + count; char *charStart = chars; int num = 0; char charLeftOver = '\0'; EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { charLeftOver = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } } else { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); } fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); Label_00D9: while (((ch2 = fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch2 == '\0') { ch2 = chars[0]; chars++; } if (charLeftOver != '\0') { if (char.IsLowSurrogate(ch2)) { charLeftOver = '\0'; num += 4; } else { chars--; fallbackBuffer.InternalFallback(charLeftOver, ref chars); charLeftOver = '\0'; } } else if (char.IsHighSurrogate(ch2)) { charLeftOver = ch2; } else { if (char.IsLowSurrogate(ch2)) { fallbackBuffer.InternalFallback(ch2, ref chars); continue; } num += 4; } } if (((encoder == null) || encoder.MustFlush) && (charLeftOver > '\0')) { fallbackBuffer.InternalFallback(charLeftOver, ref chars); charLeftOver = '\0'; goto Label_00D9; } if (num < 0) { throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); } return(num); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { char charFallback = char.MinValue; if (encoder != null) { charFallback = encoder.charLeftOver; } Encoding.EncodingByteBuffer encodingByteBuffer = new Encoding.EncodingByteBuffer((Encoding)this, encoder, bytes, byteCount, chars, charCount); while (true) { while (encodingByteBuffer.MoreData) { char nextChar = encodingByteBuffer.GetNextChar(); if ((int)charFallback != 0) { if (!char.IsLowSurrogate(nextChar)) { encodingByteBuffer.MovePrevious(false); if (!encodingByteBuffer.Fallback(charFallback)) { charFallback = char.MinValue; break; } charFallback = char.MinValue; } else { int num1 = ((int)charFallback - 55296 << 10) + ((int)nextChar - 56320); byte b4 = (byte)(num1 % 10 + 48); int num2 = num1 / 10; byte b3 = (byte)(num2 % 126 + 129); int num3 = num2 / 126; byte b2 = (byte)(num3 % 10 + 48); int num4 = num3 / 10; charFallback = char.MinValue; if (!encodingByteBuffer.AddByte((byte)(num4 + 144), b2, b3, b4)) { encodingByteBuffer.MovePrevious(false); break; } charFallback = char.MinValue; } } else if ((int)nextChar <= (int)sbyte.MaxValue) { if (!encodingByteBuffer.AddByte((byte)nextChar)) { break; } } else if (char.IsHighSurrogate(nextChar)) { charFallback = nextChar; } else if (char.IsLowSurrogate(nextChar)) { if (!encodingByteBuffer.Fallback(nextChar)) { break; } } else { ushort num1 = this.mapUnicodeToBytes[nextChar]; if (this.Is4Byte(nextChar)) { byte b4 = (byte)((int)num1 % 10 + 48); ushort num2 = (ushort)((uint)num1 / 10U); byte b3 = (byte)((int)num2 % 126 + 129); ushort num3 = (ushort)((uint)num2 / 126U); byte b2 = (byte)((int)num3 % 10 + 48); ushort num4 = (ushort)((uint)num3 / 10U); if (!encodingByteBuffer.AddByte((byte)((uint)num4 + 129U), b2, b3, b4)) { break; } } else if (!encodingByteBuffer.AddByte((byte)((uint)num1 >> 8), (byte)((uint)num1 & (uint)byte.MaxValue))) { break; } } } if ((encoder == null || encoder.MustFlush) && (int)charFallback > 0) { encodingByteBuffer.Fallback(charFallback); charFallback = char.MinValue; } else { break; } } if (encoder != null) { if ((IntPtr)bytes != IntPtr.Zero) { encoder.charLeftOver = charFallback; } encoder.m_charsUsed = encodingByteBuffer.CharsUsed; } return(encodingByteBuffer.Count); }
// GetByteCount // Note: We start by assuming that the output will be the same as count. Having // an encoder or fallback may change that assumption public override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(count >= 0, "[SBCSCodePageEncoding.GetByteCount]count is negative"); Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetByteCount]chars is null"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[SBCSCodePageEncoding.GetByteCount]Attempting to use null fallback"); CheckMemorySection(); // Need to test fallback EncoderReplacementFallback fallback = null; // Get any left over characters char charLeftOver = (char)0; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver), "[SBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate"); fallback = encoder.Fallback as EncoderReplacementFallback; // Verify that we have no fallbackbuffer, actually for SBCS this is always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[SBCSCodePageEncoding.GetByteCount]Expected empty fallback buffer at start"); } else { // If we aren't using default fallback then we may have a complicated count. fallback = EncoderFallback as EncoderReplacementFallback; } if ((fallback != null && fallback.MaxCharCount == 1) /* || bIsBestFit*/) { // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always // same as input size. // Note that no existing SBCS code pages map code points to supplementary characters, so this is easy. // We could however have 1 extra byte if the last call had an encoder and a funky fallback and // if we don't use the funky fallback this time. // Do we have an extra char left over from last time? if (charLeftOver > 0) { count++; } return(count); } // It had a funky fallback, so it's more complicated // May need buffer later EncoderFallbackBuffer fallbackBuffer = null; // prepare our end int byteCount = 0; char *charEnd = chars + count; EncoderFallbackBufferHelper fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Since leftover char was a surrogate, it'll have to be fallen back. // Get fallback Debug.Assert(encoder != null, "[SBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver"); fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(chars, charEnd, encoder, false); // This will fallback a pair if *chars is a low surrogate fallbackHelper.InternalFallback(charLeftOver, ref chars); } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackHelper.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // get byte for this char byte bTemp = _mapUnicodeToBytes[ch]; // Check for fallback, this'll catch surrogate pairs too. if (bTemp == 0 && ch != (char)0) { if (fallbackBuffer == null) { // Create & init fallback buffer if (encoder == null) { fallbackBuffer = EncoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // chars has moved so we need to remember figure it out so Exception fallback // index will be correct fallbackHelper.InternalInitialize(charEnd - count, charEnd, encoder, false); } // Get Fallback fallbackHelper.InternalFallback(ch, ref chars); continue; } // We'll use this one byteCount++; } Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[SBCSEncoding.GetByteCount]Expected Empty fallback buffer at end"); return((int)byteCount); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS baseEncoder) { // For fallback we may need a fallback buffer. // We wait to initialize it though in case we don't have any broken input unicode EncoderFallbackBuffer fallbackBuffer = null; char *pSrc = chars; char *pEnd = pSrc+count; // Start by assuming we have as many as count int byteCount = count; int ch = 0; if (baseEncoder != null) { UTF8Encoder encoder = (UTF8Encoder)baseEncoder; ch = encoder.surrogateChar; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0) throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", this.EncodingName, encoder.Fallback.GetType())); // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(chars, pEnd, encoder, false); } } for (;;) { // SLOWLOOP: does all range checks, handles all special cases, but it is slow if (pSrc >= pEnd) { if (ch == 0) { // Unroll any fallback that happens at the end ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0; if (ch > 0) { byteCount++; goto ProcessChar; } } else { // Case of surrogates in the fallback. if (fallbackBuffer != null && fallbackBuffer.bFallingBack) { Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF, "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture)); ch = fallbackBuffer.InternalGetNextChar(); byteCount++; if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { ch = 0xfffd; byteCount++; goto EncodeChar; } else if (ch > 0){ goto ProcessChar; } else { byteCount--; // ignore last one. break; } } } if (ch <= 0) { break; } if (baseEncoder != null && !baseEncoder.MustFlush) { break; } // attempt to encode the partial surrogate (will fallback or ignore it), it'll also subtract 1. byteCount++; goto EncodeChar; } if (ch > 0) { Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF, "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture)); // use separate helper variables for local contexts so that the jit optimizations // won't get confused about the variable lifetimes int cha = *pSrc; // count the pending surrogate byteCount++; // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here. // if (IsLowSurrogate(cha)) { if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { // Don't need a real # because we're just counting, anything > 0x7ff ('cept surrogate) will do. ch = 0xfffd; // ch = cha + (ch << 10) + // (0x10000 // - CharUnicodeInfo.LOW_SURROGATE_START // - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) ); // Use this next char pSrc++; } // else ch is still high surrogate and encoding will fail (so don't add count) // attempt to encode the surrogate or partial surrogate goto EncodeChar; } // If we've used a fallback, then we have to check for it if (fallbackBuffer != null) { ch = fallbackBuffer.InternalGetNextChar(); if (ch > 0) { // We have an extra byte we weren't expecting. byteCount++; goto ProcessChar; } } // read next char. The JIT optimization seems to be getting confused when // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead ch = *pSrc; pSrc++; ProcessChar: // if (IsHighSurrogate(ch)) { if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END)) { // we will count this surrogate next time around byteCount--; continue; } // either good char or partial surrogate EncodeChar: // throw exception on partial surrogate if necessary // if (IsLowSurrogate(ch) || IsHighSurrogate(ch)) if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { // Lone surrogates aren't allowed // Have to make a fallback buffer if we don't have one if (fallbackBuffer == null) { // wait on fallbacks if we can // For fallback we may need a fallback buffer if (baseEncoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = baseEncoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(chars, chars + count, baseEncoder, false); } // Do our fallback. Actually we already know its a mixed up surrogate, // so the ref pSrc isn't gonna do anything. fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrc); // Ignore it if we don't throw (we had preallocated this ch) byteCount--; ch = 0; continue; } // Count them if (ch > 0x7F) { if (ch > 0x7FF) { // the extra surrogate byte was compensated by the second surrogate character // (2 surrogates make 4 bytes. We've already counted 2 bytes, 1 per char) byteCount++; } byteCount++; } #if BIT64 // check for overflow if (byteCount < 0) { break; } #endif #if FASTLOOP // If still have fallback don't do fast loop if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0) { // We're reserving 1 byte for each char by default byteCount++; goto ProcessChar; } int availableChars = PtrDiff(pEnd, pSrc); // don't fall into the fast decoding loop if we don't have enough characters if (availableChars <= 13) { // try to get over the remainder of the ascii characters fast though char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered while (pSrc < pLocalEnd) { ch = *pSrc; pSrc++; if (ch > 0x7F) goto ProcessChar; } // we are done break; } #if BIT64 // make sure that we won't get a silent overflow inside the fast loop // (Fall out to slow loop if we have this many characters) availableChars &= 0x0FFFFFFF; #endif // To compute the upper bound, assume that all characters are ASCII characters at this point, // the boundary will be decreased for every non-ASCII character we encounter // Also, we need 3 + 4 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates char *pStop = pSrc + availableChars - (3 + 4); while (pSrc < pStop) { ch = *pSrc; pSrc++; if (ch > 0x7F) // Not ASCII { if (ch > 0x7FF) // Not 2 Byte { if ((ch & 0xF800) == 0xD800) // See if its a Surrogate goto LongCode; byteCount++; } byteCount ++; } // get pSrc aligned if ((unchecked((int)pSrc) & 0x2) != 0) { ch = *pSrc; pSrc++; if (ch > 0x7F) // Not ASCII { if (ch > 0x7FF) // Not 2 Byte { if ((ch & 0xF800) == 0xD800) // See if its a Surrogate goto LongCode; byteCount++; } byteCount ++; } } // Run 2 * 4 characters at a time! while (pSrc < pStop) { ch = *(int*)pSrc; int chc = *(int*)(pSrc+2); if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0) // See if not ASCII { if (((ch | chc) & unchecked((int)0xF800F800)) != 0) // See if not 2 Byte { goto LongCodeWithMask; } if ((ch & unchecked((int)0xFF800000)) != 0) // Actually 0x07800780 is all we care about (4 bits) byteCount++; if ((ch & unchecked((int)0xFF80)) != 0) byteCount++; if ((chc & unchecked((int)0xFF800000)) != 0) byteCount++; if ((chc & unchecked((int)0xFF80)) != 0) byteCount++; } pSrc += 4; ch = *(int*)pSrc; chc = *(int*)(pSrc+2); if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0) // See if not ASCII { if (((ch | chc) & unchecked((int)0xF800F800)) != 0) // See if not 2 Byte { goto LongCodeWithMask; } if ((ch & unchecked((int)0xFF800000)) != 0) byteCount++; if ((ch & unchecked((int)0xFF80)) != 0) byteCount++; if ((chc & unchecked((int)0xFF800000)) != 0) byteCount++; if ((chc & unchecked((int)0xFF80)) != 0) byteCount++; } pSrc += 4; } break; LongCodeWithMask: #if BIGENDIAN // be careful about the sign extension ch = (int)(((uint)ch) >> 16); #else // BIGENDIAN ch = (char)ch; #endif // BIGENDIAN pSrc++; if (ch <= 0x7F) { continue; } LongCode: // use separate helper variables for slow and fast loop so that the jit optimizations // won't get confused about the variable lifetimes if (ch > 0x7FF) { // if (IsLowSurrogate(ch) || IsHighSurrogate(ch)) if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { // 4 byte encoding - high surrogate + low surrogate int chd = *pSrc; if ( // !IsHighSurrogate(ch) // low without high -> bad ch > CharUnicodeInfo.HIGH_SURROGATE_END || // !IsLowSurrogate(chd) // high not followed by low -> bad !InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END) ) { // Back up and drop out to slow loop to figure out error pSrc--; break; } pSrc++; // byteCount - this byte is compensated by the second surrogate character } byteCount++; } byteCount++; // byteCount - the last byte is already included } #endif // FASTLOOP // no pending char at this point ch = 0; } #if BIT64 // check for overflow if (byteCount < 0) { throw new ArgumentException( Environment.GetResourceString("Argument_ConversionOverflow")); } #endif Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[UTF8Encoding.GetByteCount]Expected Empty fallback buffer"); return byteCount; }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS baseEncoder) { Encoder inEncoder = (Encoder)baseEncoder; int bits = 0; int bitCount = -1; Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(this, inEncoder, bytes, byteCount, chars, charCount); if (inEncoder != null) { bits = inEncoder.bits; bitCount = inEncoder.bitCount; while (bitCount >= 6) { bitCount -= 6; if (!buffer.AddByte(this.base64Bytes[(bits >> bitCount) & 0x3f])) { base.ThrowBytesOverflow(inEncoder, buffer.Count == 0); } } } while (buffer.MoreData) { char nextChar = buffer.GetNextChar(); if ((nextChar < '\x0080') && this.directEncode[nextChar]) { if (bitCount >= 0) { if (bitCount > 0) { if (!buffer.AddByte(this.base64Bytes[(bits << (6 - bitCount)) & 0x3f])) { break; } bitCount = 0; } if (!buffer.AddByte(0x2d)) { break; } bitCount = -1; } if (buffer.AddByte((byte)nextChar)) { continue; } break; } if ((bitCount < 0) && (nextChar == '+')) { if (buffer.AddByte(0x2b, (byte)0x2d)) { continue; } break; } if (bitCount < 0) { if (!buffer.AddByte(0x2b)) { break; } bitCount = 0; } bits = (bits << 0x10) | nextChar; bitCount += 0x10; while (bitCount >= 6) { bitCount -= 6; if (!buffer.AddByte(this.base64Bytes[(bits >> bitCount) & 0x3f])) { bitCount += 6; nextChar = buffer.GetNextChar(); break; } } if (bitCount >= 6) { break; } } if ((bitCount >= 0) && ((inEncoder == null) || inEncoder.MustFlush)) { if ((bitCount > 0) && buffer.AddByte(this.base64Bytes[(bits << (6 - bitCount)) & 0x3f])) { bitCount = 0; } if (buffer.AddByte(0x2d)) { bits = 0; bitCount = -1; } else { buffer.GetNextChar(); } } if ((bytes != null) && (inEncoder != null)) { inEncoder.bits = bits; inEncoder.bitCount = bitCount; inEncoder.m_charsUsed = buffer.CharsUsed; } return(buffer.Count); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS baseEncoder) { ISCIIEncoding.ISCIIEncoder isciiEncoder = (ISCIIEncoding.ISCIIEncoder)baseEncoder; Encoding.EncodingByteBuffer encodingByteBuffer = new Encoding.EncodingByteBuffer((Encoding)this, (EncoderNLS)isciiEncoder, bytes, byteCount, chars, charCount); int num1 = this.defaultCodePage; bool flag = false; if (isciiEncoder != null) { num1 = isciiEncoder.currentCodePage; flag = isciiEncoder.bLastVirama; if ((int)isciiEncoder.charLeftOver > 0) { encodingByteBuffer.Fallback(isciiEncoder.charLeftOver); flag = false; } } while (encodingByteBuffer.MoreData) { char nextChar = encodingByteBuffer.GetNextChar(); if ((int)nextChar < 160) { if (encodingByteBuffer.AddByte((byte)nextChar)) { flag = false; } else { break; } } else if ((int)nextChar < 2305 || (int)nextChar > 3439) { if (flag && ((int)nextChar == 8204 || (int)nextChar == 8205)) { if ((int)nextChar == 8204) { if (!encodingByteBuffer.AddByte((byte)232)) { break; } } else if (!encodingByteBuffer.AddByte((byte)233)) { break; } flag = false; } else { encodingByteBuffer.Fallback(nextChar); flag = false; } } else { int num2 = ISCIIEncoding.UnicodeToIndicChar[(int)nextChar - 2305]; byte b = (byte)num2; int num3 = 15 & num2 >> 8; int num4 = 61440 & num2; if (num2 == 0) { encodingByteBuffer.Fallback(nextChar); flag = false; } else { if (num3 != num1) { if (encodingByteBuffer.AddByte((byte)239, (byte)(num3 | 64))) { num1 = num3; } else { break; } } if (encodingByteBuffer.AddByte(b, num4 != 0 ? 1 : 0)) { flag = (int)b == 232; if (num4 != 0 && !encodingByteBuffer.AddByte(ISCIIEncoding.SecondIndicByte[num4 >> 12])) { break; } } else { break; } } } } if (num1 != this.defaultCodePage && (isciiEncoder == null || isciiEncoder.MustFlush)) { if (encodingByteBuffer.AddByte((byte)239, (byte)(this.defaultCodePage | 64))) { num1 = this.defaultCodePage; } else { int num2 = (int)encodingByteBuffer.GetNextChar(); } flag = false; } if (isciiEncoder != null && (IntPtr)bytes != IntPtr.Zero) { if (!encodingByteBuffer.fallbackBuffer.bUsedEncoder) { isciiEncoder.charLeftOver = char.MinValue; } isciiEncoder.currentCodePage = num1; isciiEncoder.bLastVirama = flag; isciiEncoder.m_charsUsed = encodingByteBuffer.CharsUsed; } return(encodingByteBuffer.Count); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) { Contract.Assert(byteCount >=0, "[UTF7Encoding.GetBytes]byteCount >=0"); Contract.Assert(chars!=null, "[UTF7Encoding.GetBytes]chars!=null"); Contract.Assert(charCount >=0, "[UTF7Encoding.GetBytes]charCount >=0"); // Get encoder info UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder; // Default bits & count int bits = 0; int bitCount = -1; // prepare our helpers Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( this, encoder, bytes, byteCount, chars, charCount); if (encoder != null) { bits = encoder.bits; bitCount = encoder.bitCount; // May have had too many left over while (bitCount >= 6) { bitCount -= 6; // If we fail we'll never really have enough room if (!buffer.AddByte(base64Bytes[(bits >> bitCount) & 0x3F])) ThrowBytesOverflow(encoder, buffer.Count == 0); } } while (buffer.MoreData) { char currentChar = buffer.GetNextChar(); if (currentChar < 0x80 && directEncode[currentChar]) { if (bitCount >= 0) { if (bitCount > 0) { // Try to add the next byte if (!buffer.AddByte(base64Bytes[bits << 6 - bitCount & 0x3F])) break; // Stop here, didn't throw bitCount = 0; } // Need to get emit '-' and our char, 2 bytes total if (!buffer.AddByte((byte)'-')) break; // Stop here, didn't throw bitCount = -1; } // Need to emit our char if (!buffer.AddByte((byte)currentChar)) break; // Stop here, didn't throw } else if (bitCount < 0 && currentChar == '+') { if (!buffer.AddByte((byte)'+', (byte)'-')) break; // Stop here, didn't throw } else { if (bitCount < 0) { // Need to emit a + and 12 bits (3 bytes) // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time if (!buffer.AddByte((byte)'+')) break; // Stop here, didn't throw // We're now in bit mode, but haven't stored data yet bitCount = 0; } // Add our bits bits = bits << 16 | currentChar; bitCount += 16; while (bitCount >= 6) { bitCount -= 6; if (!buffer.AddByte(base64Bytes[(bits >> bitCount) & 0x3F])) { bitCount += 6; // We didn't use these bits currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte // --'d it when we ran out of space break; // Stop here, not enough room for bytes } } if (bitCount >= 6) break; // Didn't have room to encode enough bits } } // Now if we have bits left over we have to encode them. // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting if (bitCount >= 0 && (encoder == null || encoder.MustFlush)) { // Do we have bits we have to stick in? if (bitCount > 0) { if (buffer.AddByte(base64Bytes[(bits << (6 - bitCount)) & 0x3F])) { // Emitted spare bits, 0 bits left bitCount = 0; } } // If converting and failed bitCount above, then we'll fail this too if (buffer.AddByte((byte)'-')) { // turned off bit mode'; bits = 0; bitCount = -1; } else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); } // Do we have an encoder we're allowed to use? // bytes == null if counting, so don't use encoder then if (bytes != null && encoder != null) { // We already cleared bits & bitcount for mustflush case encoder.bits = bits; encoder.bitCount = bitCount; encoder.m_charsUsed = buffer.CharsUsed; } return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder) { Contract.Assert(chars!=null, "[UTF7Encoding.GetByteCount]chars!=null"); Contract.Assert(count >=0, "[UTF7Encoding.GetByteCount]count >=0"); // Just call GetBytes with bytes == null return GetBytes(chars, count, null, 0, baseEncoder); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Contract.Assert(chars != null, "[ISO2022Encoding.GetBytes]chars is null"); Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetBytes]byteCount is negative"); Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Contract.Assert(encoderFallback != null, "[ISO2022Encoding.GetBytes]Attempting to use null encoder fallback"); // Fix our encoder ISO2022Encoder encoder = (ISO2022Encoder)baseEncoder; // Our return value int iCount = 0; switch(CodePage) { case 50220: case 50221: case 50222: iCount = GetBytesCP5022xJP( chars, charCount, bytes, byteCount, encoder ); break; case 50225: iCount = GetBytesCP50225KR( chars, charCount, bytes, byteCount, encoder ); break; // Everett had 50227 the same as 936 /* case 50227: iCount = GetBytesCP50227CN( chars, charCount, bytes, byteCount, encoder ); break; */ case 52936: iCount = GetBytesCP52936( chars, charCount, bytes, byteCount, encoder ); break; } return iCount; }
// // End of standard methods copied from EncodingNLS.cs // // GetByteCount // Note: We start by assuming that the output will be the same as count. Having // an encoder or fallback may change that assumption internal override unsafe int GetByteCount(char *chars, int charCount, EncoderNLS encoder) { char charLeftOver = (char)0; EncoderReplacementFallback fallback = null; // Start by assuming default count, then +/- for fallback characters char *charEnd = chars + charCount; // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { charLeftOver = encoder.charLeftOver; fallback = encoder.Fallback as EncoderReplacementFallback; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow) { throw new ArgumentException("EncoderFallbackNotEmpty"); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); } // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert } else { fallback = this.EncoderFallback as EncoderReplacementFallback; } // If we have an encoder AND we aren't using default fallback, // then we may have a complicated count. if (fallback != null && fallback.MaxCharCount == 1) { // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always // same as input size. // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy. // We could however have 1 extra byte if the last call had an encoder and a funky fallback and // if we don't use the funky fallback this time. // Do we have an extra char left over from last time? if (charLeftOver > 0) { charCount++; } return(charCount); } // Count is more complicated if you have a funky fallback // For fallback we may need a fallback buffer, we know we're not default fallback int byteCount = 0; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); // This will fallback a pair if *chars is a low surrogate fallbackBuffer.InternalFallback(charLeftOver, ref chars); } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Check for fallback, this'll catch surrogate pairs too. // no chars >= 0x80 are allowed. if (ch > 0x7f) { if (fallbackBuffer == null) { // Initialize the buffer if (encoder == null) { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false); } // Get Fallback fallbackBuffer.InternalFallback(ch, ref chars); continue; } // We'll use this one byteCount++; } return(byteCount); }
internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS baseEncoder) { return(this.GetBytes(chars, count, null, 0, baseEncoder)); }
// // End of standard methods copied from EncodingNLS.cs // // GetByteCount // Note: We start by assuming that the output will be the same as count. Having // an encoder or fallback may change that assumption internal override unsafe int GetByteCount(char *chars, int charCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetByteCount]count is negative"); Debug.Assert(chars != null, "[ASCIIEncoding.GetByteCount]chars is null"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetByteCount]Attempting to use null fallback encoder"); char charLeftOver = (char)0; EncoderReplacementFallback fallback = null; // Start by assuming default count, then +/- for fallback characters char *charEnd = chars + charCount; // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; char *charsForFallback; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate"); fallback = encoder.Fallback as EncoderReplacementFallback; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow) { throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType())); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); } // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[ASCIICodePageEncoding.GetByteCount]Expected empty fallback buffer"); } else { fallback = this.EncoderFallback as EncoderReplacementFallback; } // If we have an encoder AND we aren't using default fallback, // then we may have a complicated count. if (fallback != null && fallback.MaxCharCount == 1) { // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always // same as input size. // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy. // We could however have 1 extra byte if the last call had an encoder and a funky fallback and // if we don't use the funky fallback this time. // Do we have an extra char left over from last time? if (charLeftOver > 0) { charCount++; } return(charCount); } // Count is more complicated if you have a funky fallback // For fallback we may need a fallback buffer, we know we're not default fallback int byteCount = 0; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate"); Debug.Assert(encoder != null, "[ASCIIEncoding.GetByteCount]Expected encoder"); // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); // This will fallback a pair if *chars is a low surrogate charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); chars = charsForFallback; } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Check for fallback, this'll catch surrogate pairs too. // no chars >= 0x80 are allowed. if (ch > 0x7f) { if (fallbackBuffer == null) { // Initialize the buffer if (encoder == null) { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false); } // Get Fallback charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered fallbackBuffer.InternalFallback(ch, ref charsForFallback); chars = charsForFallback; continue; } // We'll use this one byteCount++; } Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[ASCIIEncoding.GetByteCount]Expected Empty fallback buffer"); return(byteCount); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null"); Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null"); Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0"); Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0"); char *charStart = chars; char *charEnd = chars + charCount; byte *byteStart = bytes; byte *byteEnd = bytes + byteCount; char highSurrogate = '\0'; // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; char *charsForFallback; if (encoder != null) { highSurrogate = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; // We mustn't have left over fallback data when not converting if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", this.EncodingName, encoder.Fallback.GetType())); } } else { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); char ch; TryAgain: while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Do we need a low surrogate? if (highSurrogate != '\0') { // // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here. // if (Char.IsLowSurrogate(ch)) { // Is it a legal one? uint iTemp = GetSurrogate(highSurrogate, ch); highSurrogate = '\0'; // // One surrogate pair will be translated into 4 bytes UTF32. // if (bytes + 3 >= byteEnd) { // Don't have 4 bytes if (fallbackBuffer.bFallingBack) { fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars fallbackBuffer.MovePrevious(); } else { // If we don't have enough room, then either we should've advanced a while // or we should have bytes==byteStart and throw below Debug.Assert(chars > charStart + 1 || bytes == byteStart, "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair"); chars -= 2; // Aren't using those 2 chars } ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written) highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary) break; } if (bigEndian) { *(bytes++) = (byte)(0x00); *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF *(bytes++) = (byte)(iTemp); // Implies & 0xFF } else { *(bytes++) = (byte)(iTemp); // Implies & 0xFF *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0 *(bytes++) = (byte)(0x00); } continue; } // We are missing our low surrogate, decrement chars and fallback the high surrogate // The high surrogate may have come from the encoder, but nothing else did. Debug.Assert(chars > charStart, "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate"); chars--; // Do the fallback charsForFallback = chars; fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); chars = charsForFallback; // We're going to fallback the old high surrogate. highSurrogate = '\0'; continue; } // Do we have another high surrogate?, if so remember it if (Char.IsHighSurrogate(ch)) { // // We'll have a high surrogate to check next time. // highSurrogate = ch; continue; } // Check for illegal characters (low surrogate) if (Char.IsLowSurrogate(ch)) { // We have a leading low surrogate, do the fallback charsForFallback = chars; fallbackBuffer.InternalFallback(ch, ref charsForFallback); chars = charsForFallback; // Try again with fallback buffer continue; } // We get to add the character, yippee. if (bytes + 3 >= byteEnd) { // Don't have 4 bytes if (fallbackBuffer.bFallingBack) { fallbackBuffer.MovePrevious(); // Aren't using this fallback char } else { // Must've advanced already Debug.Assert(chars > charStart, "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character"); chars--; // Aren't using this char } ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written) break; // Didn't throw, stop } if (bigEndian) { *(bytes++) = (byte)(0x00); *(bytes++) = (byte)(0x00); *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF *(bytes++) = (byte)(ch); // Implies & 0xFF } else { *(bytes++) = (byte)(ch); // Implies & 0xFF *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF *(bytes++) = (byte)(0x00); *(bytes++) = (byte)(0x00); } } // May have to do our last surrogate if ((encoder == null || encoder.MustFlush) && highSurrogate > 0) { // We have to do the fallback for the lonely high surrogate charsForFallback = chars; fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback); chars = charsForFallback; highSurrogate = (char)0; goto TryAgain; } // Fix our encoder if we have one Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush), "[UTF32Encoding.GetBytes]Expected encoder to be flushed."); if (encoder != null) { // Remember our left over surrogate (or 0 if flushing) encoder.charLeftOver = highSurrogate; // Need # chars used encoder.m_charsUsed = (int)(chars - charStart); } // return the new length return((int)(bytes - byteStart)); }
// // End of standard methods copied from EncodingNLS.cs // internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { // Start by assuming each char gets 2 bytes int byteCount = count << 1; // Check for overflow in byteCount // (If they were all invalid chars, this would actually be wrong, // but that's a ridiculously large # so we're not concerned about that case) if (byteCount < 0) throw new ArgumentOutOfRangeException("count", "GetByteCountOverflow"); char* charStart = chars; char* charEnd = chars + count; char charLeftOver = (char)0; bool wasHereBefore = false; // Need -1 to check 2 at a time. If we have an even #, longChars will go // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars // will go from longEnd - 1 long to longEnd. (Might not get to use this) ulong* longEnd = (ulong*)(charEnd - 3); // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { charLeftOver = encoder.charLeftOver; // Assume extra bytes to encode charLeftOver if it existed if (charLeftOver > 0) byteCount += 2; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0) throw new ArgumentException("EncoderFallbackNotEmpty"); // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } } char ch; TryAgain: while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, maybe we can do it fast #if !NO_FAST_UNICODE_LOOP #if BIGENDIAN // If endianess is backwards then each pair of bytes would be backwards. if ( bigEndian && #else if (!bigEndian && #endif // BIGENDIAN #if WIN64 // 64 bit CPU needs to be long aligned for this to work. charLeftOver == 0 && (unchecked((long)chars) & 7) == 0) #else charLeftOver == 0 && (unchecked((int)chars) & 3) == 0) #endif { // Need new char* so we can check 4 at a time ulong* longChars = (ulong*)chars; while (longChars < longEnd) { // See if we potentially have surrogates (0x8000 bit set) // (We're either big endian on a big endian machine or little endian on // a little endian machine so this'll work) if ((0x8000800080008000 & *longChars) != 0) { // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high // 5 bits looks like 11011, then its a high or low surrogate. // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set. // Note that we expect BMP characters to be more common than surrogates // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800; // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate // but no clue if they're high or low. // If each of the 4 characters are non-zero, then none are surrogates. if ((uTemp & 0xFFFF000000000000) == 0 || (uTemp & 0x0000FFFF00000000) == 0 || (uTemp & 0x00000000FFFF0000) == 0 || (uTemp & 0x000000000000FFFF) == 0) { // It has at least 1 surrogate, but we don't know if they're high or low surrogates, // or if there's 1 or 4 surrogates // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern #if BIGENDIAN if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0) #else if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0) #endif { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. // Drop out to the slow loop to resolve the surrogates break; } // else they are all surrogates in High/Low/High/Low order, so we can use them. } // else none are surrogates, so we can use them. } // else all < 0x8000 so we can use them // We already counted these four chars, go to next long. longChars++; } chars = (char*)longChars; if (chars >= charEnd) break; } #endif // !NO_FAST_UNICODE_LOOP // No fallback, just get next char ch = *chars; chars++; } else { // We weren't preallocating fallback space. byteCount += 2; } // Check for high or low surrogates if (ch >= 0xd800 && ch <= 0xdfff) { // Was it a high surrogate? if (ch <= 0xdbff) { // Its a high surrogate, if we already had a high surrogate do its fallback if (charLeftOver > 0) { // Unwind the current character, this should be safe because we // don't have leftover data in the fallback, so chars must have // advanced already. chars--; // If previous high surrogate deallocate 2 bytes byteCount -= 2; // Fallback the previous surrogate // Need to initialize fallback buffer? if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } fallbackBuffer.InternalFallback(charLeftOver, ref chars); // Now no high surrogate left over charLeftOver = (char)0; continue; } // Remember this high surrogate charLeftOver = ch; continue; } // Its a low surrogate if (charLeftOver == 0) { // Expected a previous high surrogate. // Don't count this one (we'll count its fallback if necessary) byteCount -= 2; // fallback this one // Need to initialize fallback buffer? if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } fallbackBuffer.InternalFallback(ch, ref chars); continue; } // Valid surrogate pair, add our charLeftOver charLeftOver = (char)0; continue; } else if (charLeftOver > 0) { // Expected a low surrogate, but this char is normal // Rewind the current character, fallback previous character. // this should be safe because we don't have leftover data in the // fallback, so chars must have advanced already. chars--; // fallback previous chars // Need to initialize fallback buffer? if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } fallbackBuffer.InternalFallback(charLeftOver, ref chars); // Ignore charLeftOver or throw byteCount -= 2; charLeftOver = (char)0; continue; } // Ok we had something to add (already counted) } // Don't allocate space for left over char if (charLeftOver > 0) { byteCount -= 2; // If we have to flush, stick it in fallback and try again if (encoder == null || encoder.MustFlush) { if (wasHereBefore) { // Throw it, using our complete character throw new ArgumentException("RecursiveFallback"); } else { // Need to initialize fallback buffer? if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } fallbackBuffer.InternalFallback(charLeftOver, ref chars); charLeftOver = (char)0; wasHereBefore = true; goto TryAgain; } } } // Don't remember fallbackBuffer.encoder for counting return byteCount; }
// // End of standard methods copied from EncodingNLS.cs // internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { BCLDebug.Assert(chars!=null, "[UTF32Encoding.GetByteCount]chars!=null"); BCLDebug.Assert(count >=0, "[UTF32Encoding.GetByteCount]count >=0"); char* end = chars + count; char* charStart = chars; int byteCount = 0; char highSurrogate = '\0'; // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { highSurrogate = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; // We mustn't have left over fallback data when counting if (fallbackBuffer.Remaining > 0) throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", this.EncodingName, encoder.Fallback.GetType())); } else { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, end, encoder, false); char ch; TryAgain: while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Do we need a low surrogate? if (highSurrogate != '\0') { // // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here. // if (Char.IsLowSurrogate(ch)) { // They're all legal highSurrogate = '\0'; // // One surrogate pair will be translated into 4 bytes UTF32. // byteCount += 4; continue; } // We are missing our low surrogate, decrement chars and fallback the high surrogate // The high surrogate may have come from the encoder, but nothing else did. BCLDebug.Assert(chars > charStart, "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate"); chars--; // Do the fallback fallbackBuffer.InternalFallback(highSurrogate, ref chars); // We're going to fallback the old high surrogate. highSurrogate = '\0'; continue; } // Do we have another high surrogate? if (Char.IsHighSurrogate(ch)) { // // We'll have a high surrogate to check next time. // highSurrogate = ch; continue; } // Check for illegal characters if (Char.IsLowSurrogate(ch)) { // We have a leading low surrogate, do the fallback fallbackBuffer.InternalFallback(ch, ref chars); // Try again with fallback buffer continue; } // We get to add the character (4 bytes UTF32) byteCount += 4; } // May have to do our last surrogate if ((encoder == null || encoder.MustFlush) && highSurrogate > 0) { // We have to do the fallback for the lonely high surrogate fallbackBuffer.InternalFallback(highSurrogate, ref chars); highSurrogate = (char)0; goto TryAgain; } // Check for overflows. if (byteCount < 0) throw new ArgumentOutOfRangeException("count", Environment.GetResourceString( "ArgumentOutOfRange_GetByteCountOverflow")); // Shouldn't have anything in fallback buffer for GetByteCount // (don't have to check m_throwOnOverflow for count) BCLDebug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end"); // Return our count return byteCount; }
internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) { return(this.GetBytes(chars, count, (byte *)null, 0, encoder)); }
internal override unsafe int GetBytes(char *chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { BCLDebug.Assert(chars!=null, "[UTF32Encoding.GetBytes]chars!=null"); BCLDebug.Assert(bytes!=null, "[UTF32Encoding.GetBytes]bytes!=null"); BCLDebug.Assert(byteCount >=0, "[UTF32Encoding.GetBytes]byteCount >=0"); BCLDebug.Assert(charCount >=0, "[UTF32Encoding.GetBytes]charCount >=0"); char* charStart = chars; char* charEnd = chars + charCount; byte* byteStart = bytes; byte* byteEnd = bytes + byteCount; char highSurrogate = '\0'; // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { highSurrogate = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; // We mustn't have left over fallback data when not converting if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0) throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", this.EncodingName, encoder.Fallback.GetType())); } else { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); char ch; TryAgain: while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Do we need a low surrogate? if (highSurrogate != '\0') { // // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here. // if (Char.IsLowSurrogate(ch)) { // Is it a legal one? uint iTemp = GetSurrogate(highSurrogate, ch); highSurrogate = '\0'; // // One surrogate pair will be translated into 4 bytes UTF32. // if (bytes+3 >= byteEnd) { // Don't have 4 bytes if (fallbackBuffer.bFallingBack) { fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars fallbackBuffer.MovePrevious(); } else { // If we don't have enough room, then either we should've advanced a while // or we should have bytes==byteStart and throw below BCLDebug.Assert(chars > charStart + 1 || bytes == byteStart, "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair"); chars-=2; // Aren't using those 2 chars } ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written) highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary) break; } if (bigEndian) { *(bytes++) = (byte)(0x00); *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF *(bytes++) = (byte)(iTemp); // Implies & 0xFF } else { *(bytes++) = (byte)(iTemp); // Implies & 0xFF *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0 *(bytes++) = (byte)(0x00); } continue; } // We are missing our low surrogate, decrement chars and fallback the high surrogate // The high surrogate may have come from the encoder, but nothing else did. BCLDebug.Assert(chars > charStart, "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate"); chars--; // Do the fallback fallbackBuffer.InternalFallback(highSurrogate, ref chars); // We're going to fallback the old high surrogate. highSurrogate = '\0'; continue; } // Do we have another high surrogate?, if so remember it if (Char.IsHighSurrogate(ch)) { // // We'll have a high surrogate to check next time. // highSurrogate = ch; continue; } // Check for illegal characters (low surrogate) if (Char.IsLowSurrogate(ch)) { // We have a leading low surrogate, do the fallback fallbackBuffer.InternalFallback(ch, ref chars); // Try again with fallback buffer continue; } // We get to add the character, yippee. if (bytes+3 >= byteEnd) { // Don't have 4 bytes if (fallbackBuffer.bFallingBack) fallbackBuffer.MovePrevious(); // Aren't using this fallback char else { // Must've advanced already BCLDebug.Assert(chars > charStart, "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character"); chars--; // Aren't using this char } ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written) break; // Didn't throw, stop } if (bigEndian) { *(bytes++) = (byte)(0x00); *(bytes++) = (byte)(0x00); *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF *(bytes++) = (byte)(ch); // Implies & 0xFF } else { *(bytes++) = (byte)(ch); // Implies & 0xFF *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF *(bytes++) = (byte)(0x00); *(bytes++) = (byte)(0x00); } } // May have to do our last surrogate if ((encoder == null || encoder.MustFlush) && highSurrogate > 0) { // We have to do the fallback for the lonely high surrogate fallbackBuffer.InternalFallback(highSurrogate, ref chars); highSurrogate = (char)0; goto TryAgain; } // Fix our encoder if we have one BCLDebug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush), "[UTF32Encoding.GetBytes]Expected encoder to be flushed."); if (encoder != null) { // Remember our left over surrogate (or 0 if flushing) encoder.charLeftOver = highSurrogate; // Need # chars used encoder.m_charsUsed = (int)(chars-charStart); } // return the new length return (int)(bytes - byteStart); }
internal override unsafe int GetByteCount(char *chars, int charCount, EncoderNLS encoder) { char ch2; char charLeftOver = '\0'; EncoderReplacementFallback encoderFallback = null; char *charEnd = chars + charCount; EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { charLeftOver = encoder.charLeftOver; encoderFallback = encoder.Fallback as EncoderReplacementFallback; if (encoder.InternalHasFallbackBuffer) { fallbackBuffer = encoder.FallbackBuffer; if ((fallbackBuffer.Remaining > 0) && encoder.m_throwOnOverflow) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); } } else { encoderFallback = base.EncoderFallback as EncoderReplacementFallback; } if ((encoderFallback != null) && (encoderFallback.MaxCharCount == 1)) { if (charLeftOver > '\0') { charCount++; } return(charCount); } int num = 0; if (charLeftOver > '\0') { fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); fallbackBuffer.InternalFallback(charLeftOver, ref chars); } while (((ch2 = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch2 == '\0') { ch2 = chars[0]; chars++; } if (ch2 > '\x007f') { if (fallbackBuffer == null) { if (encoder == null) { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false); } fallbackBuffer.InternalFallback(ch2, ref chars); } else { num++; } } return(num); }
// Set the above values // This can't be part of the constructor because EncoderFallbacks would have to know how to impliment these. internal unsafe void InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder) { this.charStart = charStart; this.charEnd = charEnd; this.encoder = encoder; this.setEncoder = setEncoder; this.bUsedEncoder = false; this.bFallingBack = false; this.iRecursionCount = 0; }
internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { return this.GetByteCount(chars, count); }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { // Just call GetBytes() with null bytes return GetBytes(chars, count, null, 0, encoder); }
internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded) { if (((encoder == null) || encoder.m_throwOnOverflow) || nothingEncoded) { if ((encoder != null) && encoder.InternalHasFallbackBuffer) { encoder.FallbackBuffer.InternalReset(); } this.ThrowBytesOverflow(); } encoder.ClearMustFlush(); }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null bytes as a count // Debug.Assert(bytes != null, "[GB18030Encoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[GB18030Encoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[GB18030Encoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[GB18030Encoding.GetBytes]Attempting to use null encoder fallback"); // Get any left over characters char charLeftOver = (char)0; if (encoder != null) charLeftOver = encoder.charLeftOver; // prepare our helpers EncodingByteBuffer buffer = new EncodingByteBuffer(this, encoder, bytes, byteCount, chars, charCount); // Try again if we were MustFlush TryAgain: // Go ahead and do it, including the fallback. while (buffer.MoreData) { // Get next char char ch = buffer.GetNextChar(); // Have to check for charLeftOver if (charLeftOver != 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[GB18030Encoding.GetBytes] leftover character should be high surrogate, not 0x" + ((int)charLeftOver).ToString("X4", CultureInfo.InvariantCulture)); // If our next char isn't a low surrogate, then we need to do fallback. if (!Char.IsLowSurrogate(ch)) { // No low surrogate, fallback high surrogate & try this one again buffer.MovePrevious(false); // (Ignoring this character, don't throw) if (!buffer.Fallback(charLeftOver)) { charLeftOver = (char)0; break; } charLeftOver = (char)0; continue; } else { // Next is a surrogate, add it as surrogate pair // Need 4 bytes for surrogates // Get our offset int offset = ((charLeftOver - 0xd800) << 10) + (ch - 0xdc00); byte byte4 = (byte)((offset % 0x0a) + 0x30); offset /= 0x0a; byte byte3 = (byte)((offset % 0x7e) + 0x81); offset /= 0x7e; byte byte2 = (byte)((offset % 0x0a) + 0x30); offset /= 0x0a; Debug.Assert(offset < 0x6f, "[GB18030Encoding.GetBytes](1) Expected offset < 0x6f, not 0x" + offset.ToString("X2", CultureInfo.InvariantCulture)); charLeftOver = (char)0; if (!buffer.AddByte((byte)(offset + 0x90), byte2, byte3, byte4)) { // Didn't work, need to back up for both surrogates (AddByte already backed up one) buffer.MovePrevious(false); // (don't throw) break; } } charLeftOver = '\0'; } // ASCII's easiest else if (ch <= 0x7f) { // Need a byte if (!buffer.AddByte((byte)ch)) break; } // See if its a surrogate pair else if (Char.IsHighSurrogate(ch)) { // Remember it for next time charLeftOver = ch; } else if (Char.IsLowSurrogate(ch)) { // Low surrogates should've been found already if (!buffer.Fallback(ch)) break; } else { // Not surrogate or ASCII, get value ushort iBytes = mapUnicodeToBytes[ch]; // See what kind it is if (Is4Byte(ch)) { // // This Unicode character will be converted to four-byte GB18030. // // Need 4 bytes byte byte4 = (byte)((iBytes % 0x0a) + 0x30); iBytes /= 0x0a; byte byte3 = (byte)((iBytes % 0x7e) + 0x81); iBytes /= 0x7e; byte byte2 = (byte)((iBytes % 0x0a) + 0x30); iBytes /= 0x0a; Debug.Assert(iBytes < 0x7e, "[GB18030Encoding.GetBytes]Expected iBytes < 0x7e, not 0x" + iBytes.ToString("X2", CultureInfo.InvariantCulture)); if (!buffer.AddByte((byte)(iBytes + 0x81), byte2, byte3, byte4)) break; } else { // Its 2 byte, use it if (!buffer.AddByte(unchecked((byte)(iBytes >> 8)), unchecked((byte)(iBytes & 0xff)))) break; } } } // Do we need to flush our charLeftOver? if ((encoder == null || encoder.MustFlush) && (charLeftOver > 0)) { // Fall it back buffer.Fallback(charLeftOver); charLeftOver = (char)0; goto TryAgain; } // Fallback stuck it in encoder if necessary, but we have to clear MustFlash cases // (Check bytes != null, don't clear it if we're just counting) if (encoder != null) { // Remember our charLeftOver if (bytes != null) encoder.charLeftOver = charLeftOver; encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }
public unsafe abstract int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder);
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetByteCount]count is negative"); Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetByteCount]chars is null"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[DBCSCodePageEncoding.GetByteCount]Attempting to use null fallback"); CheckMemorySection(); // Get any left over characters char charLeftOver = (char)0; if (encoder != null) { charLeftOver = encoder.charLeftOver; // Only count if encoder.m_throwOnOverflow if (encoder.InternalHasFallbackBuffer && encoder.FallbackBuffer.Remaining > 0) throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, EncodingName, encoder.Fallback.GetType())); } // prepare our end int byteCount = 0; char* charEnd = chars + count; // For fallback we will need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; EncoderFallbackBufferHelper fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[DBCSCodePageEncoding.GetByteCount]leftover character should be high surrogate"); Debug.Assert(encoder != null, "[DBCSCodePageEncoding.GetByteCount]Expect to have encoder if we have a charLeftOver"); // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(chars, charEnd, encoder, false); // This will fallback a pair if *chars is a low surrogate fallbackHelper.InternalFallback(charLeftOver, ref chars); } // Now we may have fallback char[] already (from the encoder) // We have to use fallback method. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackHelper.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // get byte for this char ushort sTemp = mapUnicodeToBytes[ch]; // Check for fallback, this'll catch surrogate pairs too. if (sTemp == 0 && ch != (char)0) { if (fallbackBuffer == null) { // Initialize the buffer if (encoder == null) fallbackBuffer = EncoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(charEnd - count, charEnd, encoder, false); } // Get Fallback fallbackHelper.InternalFallback(ch, ref chars); continue; } // We'll use this one byteCount++; if (sTemp >= 0x100) byteCount++; } return (int)byteCount; }
// Our workhorse version internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder) { // Use null pointer to ask GetBytes for count return GetBytes(chars, count, null, 0, baseEncoder); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { char ch2; base.CheckMemorySection(); EncoderFallbackBuffer fallbackBuffer = null; char *charEnd = chars + charCount; char *chPtr2 = chars; byte *numPtr = bytes; byte *numPtr2 = bytes + byteCount; char charLeftOver = '\0'; if (encoder != null) { charLeftOver = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true); if (encoder.m_throwOnOverflow && (fallbackBuffer.Remaining > 0)) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } if (charLeftOver > '\0') { fallbackBuffer.InternalFallback(charLeftOver, ref chars); } } while (((ch2 = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch2 == '\0') { ch2 = chars[0]; chars++; } ushort num = this.mapUnicodeToBytes[ch2]; if ((num == 0) && (ch2 != '\0')) { if (fallbackBuffer == null) { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } fallbackBuffer.InternalFallback(ch2, ref chars); } else { if (num >= 0x100) { if ((bytes + 1) >= numPtr2) { if ((fallbackBuffer == null) || !fallbackBuffer.bFallingBack) { chars--; } else { fallbackBuffer.MovePrevious(); } base.ThrowBytesOverflow(encoder, chars == chPtr2); break; } bytes[0] = (byte)(num >> 8); bytes++; } else if (bytes >= numPtr2) { if ((fallbackBuffer == null) || !fallbackBuffer.bFallingBack) { chars--; } else { fallbackBuffer.MovePrevious(); } base.ThrowBytesOverflow(encoder, chars == chPtr2); break; } bytes[0] = (byte)(num & 0xff); bytes++; } } if (encoder != null) { if ((fallbackBuffer != null) && !fallbackBuffer.bUsedEncoder) { encoder.charLeftOver = '\0'; } encoder.m_charsUsed = (int)((long)((chars - chPtr2) / 2)); } return((int)((long)((bytes - numPtr) / 1))); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS baseEncoder) { Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0"); Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null"); Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0"); // Get encoder info UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder; // Default bits & count int bits = 0; int bitCount = -1; // prepare our helpers Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( this, encoder, bytes, byteCount, chars, charCount); if (encoder != null) { bits = encoder.bits; bitCount = encoder.bitCount; // May have had too many left over while (bitCount >= 6) { bitCount -= 6; // If we fail we'll never really have enough room if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F])) { ThrowBytesOverflow(encoder, buffer.Count == 0); } } } while (buffer.MoreData) { char currentChar = buffer.GetNextChar(); if (currentChar < 0x80 && _directEncode[currentChar]) { if (bitCount >= 0) { if (bitCount > 0) { // Try to add the next byte if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F])) { break; // Stop here, didn't throw } bitCount = 0; } // Need to get emit '-' and our char, 2 bytes total if (!buffer.AddByte((byte)'-')) { break; // Stop here, didn't throw } bitCount = -1; } // Need to emit our char if (!buffer.AddByte((byte)currentChar)) { break; // Stop here, didn't throw } } else if (bitCount < 0 && currentChar == '+') { if (!buffer.AddByte((byte)'+', (byte)'-')) { break; // Stop here, didn't throw } } else { if (bitCount < 0) { // Need to emit a + and 12 bits (3 bytes) // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time if (!buffer.AddByte((byte)'+')) { break; // Stop here, didn't throw } // We're now in bit mode, but haven't stored data yet bitCount = 0; } // Add our bits bits = bits << 16 | currentChar; bitCount += 16; while (bitCount >= 6) { bitCount -= 6; if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F])) { bitCount += 6; // We didn't use these bits currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte // --'d it when we ran out of space break; // Stop here, not enough room for bytes } } if (bitCount >= 6) { break; // Didn't have room to encode enough bits } } } // Now if we have bits left over we have to encode them. // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting if (bitCount >= 0 && (encoder == null || encoder.MustFlush)) { // Do we have bits we have to stick in? if (bitCount > 0) { if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F])) { // Emitted spare bits, 0 bits left bitCount = 0; } } // If converting and failed bitCount above, then we'll fail this too if (buffer.AddByte((byte)'-')) { // turned off bit mode'; bits = 0; bitCount = -1; } else { // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); } } // Do we have an encoder we're allowed to use? // bytes == null if counting, so don't use encoder then if (bytes != null && encoder != null) { // We already cleared bits & bitcount for mustflush case encoder.bits = bits; encoder.bitCount = bitCount; encoder._charsUsed = buffer.CharsUsed; } return(buffer.Count); }
// GetByteCount // Note: We start by assuming that the output will be the same as count. Having // an encoder or fallback may change that assumption internal override unsafe int GetByteCount(char* chars, int charCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(charCount >= 0, "[Latin1Encoding.GetByteCount]count is negative"); Debug.Assert(chars != null, "[Latin1Encoding.GetByteCount]chars is null"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(encoderFallback != null, "[Latin1Encoding.GetByteCount]Attempting to use null fallback encoder"); char charLeftOver = (char)0; // If we have an encoder AND we aren't using default fallback, // then we may have a complicated count. EncoderReplacementFallback fallback; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[Latin1Encoding.GetByteCount]leftover character should be high surrogate"); fallback = encoder.Fallback as EncoderReplacementFallback; // Verify that we have no fallbackbuffer, for Latin1 its always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[Latin1CodePageEncoding.GetByteCount]Expected empty fallback buffer"); } else fallback = this.EncoderFallback as EncoderReplacementFallback; if ((fallback != null && fallback.MaxCharCount == 1)/* || bIsBestFit*/) { // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always // same as input size. // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy. // We could however have 1 extra byte if the last call had an encoder and a funky fallback and // if we don't use the funky fallback this time. // Do we have an extra char left over from last time? if (charLeftOver > 0) charCount++; return (charCount); } // Count is more complicated if you have a funky fallback // For fallback we may need a fallback buffer, we know we're not default fallback int byteCount = 0; // Start by assuming default count, then +/- for fallback characters char* charEnd = chars + charCount; // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Initialize the buffer Debug.Assert(encoder != null, "[Latin1Encoding.GetByteCount]Expected encoder if we have charLeftOver"); fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false); // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback // This will fallback a pair if *chars is a low surrogate fallbackBuffer.InternalFallback(charLeftOver, ref chars); } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Check for fallback, this'll catch surrogate pairs too. // no chars >= 0x100 are allowed. if (ch > 0xff) { // Initialize the buffer if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false); } // Get Fallback fallbackBuffer.InternalFallback(ch, ref chars); continue; } // We'll use this one byteCount++; } Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[Latin1Encoding.GetByteCount]Expected Empty fallback buffer"); return byteCount; }
public override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[SBCSCodePageEncoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[SBCSCodePageEncoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[SBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback"); CheckMemorySection(); // Need to test fallback EncoderReplacementFallback fallback = null; // Get any left over characters char charLeftOver = (char)0; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver), "[SBCSCodePageEncoding.GetBytes]leftover character should be high surrogate"); fallback = encoder.Fallback as EncoderReplacementFallback; // Verify that we have no fallbackbuffer, for SBCS its always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[SBCSCodePageEncoding.GetBytes]Expected empty fallback buffer at start"); // if (encoder.m_throwOnOverflow && encoder.InternalHasFallbackBuffer && // encoder.FallbackBuffer.Remaining > 0) // throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", // EncodingName, encoder.Fallback.GetType())); } else { // If we aren't using default fallback then we may have a complicated count. fallback = EncoderFallback as EncoderReplacementFallback; } // prepare our end char *charEnd = chars + charCount; byte *byteStart = bytes; char *charStart = chars; // See if we do the fast default or slightly slower fallback if (fallback != null && fallback.MaxCharCount == 1) { // Make sure our fallback character is valid first byte bReplacement = _mapUnicodeToBytes[fallback.DefaultString[0]]; // Check for replacements in range, otherwise fall back to slow version. if (bReplacement != 0) { // We should have exactly as many output bytes as input bytes, unless there's a leftover // character, in which case we may need one more. // If we had a leftover character we will have to add a ? (This happens if they had a funky // fallback last time, but not this time. We can't spit any out though, // because with fallback encoder each surrogate is treated as a separate code point) if (charLeftOver > 0) { // Have to have room // Throw even if doing no throw version because this is just 1 char, // so buffer will never be big enough if (byteCount == 0) { ThrowBytesOverflow(encoder, true); } // This'll make sure we still have more room and also make sure our return value is correct. *(bytes++) = bReplacement; byteCount--; // We used one of the ones we were counting. } // This keeps us from overrunning our output buffer if (byteCount < charCount) { // Throw or make buffer smaller? ThrowBytesOverflow(encoder, byteCount < 1); // Just use what we can charEnd = chars + byteCount; } // Simple way while (chars < charEnd) { char ch2 = *chars; chars++; byte bTemp = _mapUnicodeToBytes[ch2]; // Check for fallback if (bTemp == 0 && ch2 != (char)0) { *bytes = bReplacement; } else { *bytes = bTemp; } bytes++; } // Clear encoder if (encoder != null) { encoder.charLeftOver = (char)0; encoder.m_charsUsed = (int)(chars - charStart); } return((int)(bytes - byteStart)); } } // Slower version, have to do real fallback. // For fallback we may need a fallback buffer, we know we aren't default fallback EncoderFallbackBuffer fallbackBuffer = null; // prepare our end byte *byteEnd = bytes + byteCount; EncoderFallbackBufferHelper fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback Debug.Assert(encoder != null, "[SBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver"); fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(chars, charEnd, encoder, true); // This will fallback a pair if *chars is a low surrogate fallbackHelper.InternalFallback(charLeftOver, ref chars); if (fallbackBuffer.Remaining > byteEnd - bytes) { // Throw it, if we don't have enough for this we never will ThrowBytesOverflow(encoder, true); } } // Now we may have fallback char[] already from the encoder fallback above // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackHelper.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // get byte for this char byte bTemp = _mapUnicodeToBytes[ch]; // Check for fallback, this'll catch surrogate pairs too. if (bTemp == 0 && ch != (char)0) { // Get Fallback if (fallbackBuffer == null) { // Create & init fallback buffer if (encoder == null) { fallbackBuffer = EncoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // chars has moved so we need to remember figure it out so Exception fallback // index will be correct fallbackHelper.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } // Make sure we have enough room. Each fallback char will be 1 output char // (or recursion exception will be thrown) fallbackHelper.InternalFallback(ch, ref chars); if (fallbackBuffer.Remaining > byteEnd - bytes) { // Didn't use this char, reset it Debug.Assert(chars > charStart, "[SBCSCodePageEncoding.GetBytes]Expected chars to have advanced (fallback)"); chars--; fallbackHelper.InternalReset(); // Throw it & drop this data ThrowBytesOverflow(encoder, chars == charStart); break; } continue; } // We'll use this one // Bounds check if (bytes >= byteEnd) { // didn't use this char, we'll throw or use buffer Debug.Assert(fallbackBuffer == null || fallbackHelper.bFallingBack == false, "[SBCSCodePageEncoding.GetBytes]Expected to NOT be falling back"); if (fallbackBuffer == null || fallbackHelper.bFallingBack == false) { Debug.Assert(chars > charStart, "[SBCSCodePageEncoding.GetBytes]Expected chars to have advanced (normal)"); chars--; // don't use last char } ThrowBytesOverflow(encoder, chars == charStart); // throw ? break; // don't throw, stop } // Go ahead and add it *bytes = bTemp; bytes++; } // encoder stuff if we have one if (encoder != null) { // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases if (fallbackBuffer != null && !fallbackHelper.bUsedEncoder) { // Clear it in case of MustFlush encoder.charLeftOver = (char)0; } // Set our chars used count encoder.m_charsUsed = (int)(chars - charStart); } // Expect Empty fallback buffer for SBCS Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[SBCSEncoding.GetBytes]Expected Empty fallback buffer at end"); return((int)(bytes - byteStart)); }
// For NLS Encodings, workhorse takes an encoder (may be null) // Always validate parameters before calling internal version, which will only assert. internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { Contract.Requires(chars != null); Contract.Requires(count >= 0); return GetByteCount(chars, count); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[ASCIIEncoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[ASCIIEncoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetBytes]Attempting to use null encoder fallback"); // Get any left over characters char charLeftOver = (char)0; EncoderReplacementFallback fallback = null; // For fallback we may need a fallback buffer, we know we aren't default fallback. EncoderFallbackBuffer fallbackBuffer = null; char *charsForFallback; // prepare our end char *charEnd = chars + charCount; byte *byteStart = bytes; char *charStart = chars; if (encoder != null) { charLeftOver = encoder.charLeftOver; fallback = encoder.Fallback as EncoderReplacementFallback; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow) { throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType())); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetBytes]leftover character should be high surrogate"); // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[ASCIICodePageEncoding.GetBytes]Expected empty fallback buffer"); } else { fallback = this.EncoderFallback as EncoderReplacementFallback; } // See if we do the fast default or slightly slower fallback if (fallback != null && fallback.MaxCharCount == 1) { // Fast version char cReplacement = fallback.DefaultString[0]; // Check for replacements in range, otherwise fall back to slow version. if (cReplacement <= (char)0x7f) { // We should have exactly as many output bytes as input bytes, unless there's a left // over character, in which case we may need one more. // If we had a left over character will have to add a ? (This happens if they had a funky // fallback last time, but not this time.) (We can't spit any out though // because with fallback encoder each surrogate is treated as a seperate code point) if (charLeftOver > 0) { // Have to have room // Throw even if doing no throw version because this is just 1 char, // so buffer will never be big enough if (byteCount == 0) { ThrowBytesOverflow(encoder, true); } // This'll make sure we still have more room and also make sure our return value is correct. *(bytes++) = (byte)cReplacement; byteCount--; // We used one of the ones we were counting. } // This keeps us from overrunning our output buffer if (byteCount < charCount) { // Throw or make buffer smaller? ThrowBytesOverflow(encoder, byteCount < 1); // Just use what we can charEnd = chars + byteCount; } // We just do a quick copy while (chars < charEnd) { char ch2 = *(chars++); if (ch2 >= 0x0080) { *(bytes++) = (byte)cReplacement; } else { *(bytes++) = unchecked ((byte)(ch2)); } } // Clear encoder if (encoder != null) { encoder.charLeftOver = (char)0; encoder.m_charsUsed = (int)(chars - charStart); } return((int)(bytes - byteStart)); } } // Slower version, have to do real fallback. // prepare our end byte *byteEnd = bytes + byteCount; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Initialize the buffer Debug.Assert(encoder != null, "[ASCIIEncoding.GetBytes]Expected non null encoder if we have surrogate left over"); fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true); // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback // This will fallback a pair if *chars is a low surrogate charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback); chars = charsForFallback; } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Check for fallback, this'll catch surrogate pairs too. // All characters >= 0x80 must fall back. if (ch > 0x7f) { // Initialize the buffer if (fallbackBuffer == null) { if (encoder == null) { fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = encoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } // Get Fallback charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered fallbackBuffer.InternalFallback(ch, ref charsForFallback); chars = charsForFallback; // Go ahead & continue (& do the fallback) continue; } // We'll use this one // Bounds check if (bytes >= byteEnd) { // didn't use this char, we'll throw or use buffer if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false) { Debug.Assert(chars > charStart || bytes == byteStart, "[ASCIIEncoding.GetBytes]Expected chars to have advanced already."); chars--; // don't use last char } else { fallbackBuffer.MovePrevious(); } // Are we throwing or using buffer? ThrowBytesOverflow(encoder, bytes == byteStart); // throw? break; // don't throw, stop } // Go ahead and add it *bytes = unchecked ((byte)ch); bytes++; } // Need to do encoder stuff if (encoder != null) { // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder) { // Clear it in case of MustFlush encoder.charLeftOver = (char)0; } // Set our chars used count encoder.m_charsUsed = (int)(chars - charStart); } Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 || (encoder != null && !encoder.m_throwOnOverflow), "[ASCIIEncoding.GetBytes]Expected Empty fallback buffer at end"); return((int)(bytes - byteStart)); }
internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded) { if (encoder == null || encoder.m_throwOnOverflow || nothingEncoded) { if (encoder != null && encoder.InternalHasFallbackBuffer) encoder.FallbackBuffer.InternalReset(); // Special message to include fallback type in case fallback's GetMaxCharCount is broken // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount ThrowBytesOverflow(); } // If we didn't throw, we are in convert and have to remember our flushing encoder.ClearMustFlush(); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) { Contract.Assert(chars!=null, "[UTF8Encoding.GetBytes]chars!=null"); Contract.Assert(byteCount >=0, "[UTF8Encoding.GetBytes]byteCount >=0"); Contract.Assert(charCount >=0, "[UTF8Encoding.GetBytes]charCount >=0"); Contract.Assert(bytes!=null, "[UTF8Encoding.GetBytes]bytes!=null"); UTF8Encoder encoder = null; // For fallback we may need a fallback buffer. // We wait to initialize it though in case we don't have any broken input unicode EncoderFallbackBuffer fallbackBuffer = null; char *pSrc = chars; byte *pTarget = bytes; char *pEnd = pSrc+charCount; byte *pAllocatedBufferEnd = pTarget+byteCount; int ch = 0; // assume that JIT will enregister pSrc, pTarget and ch if (baseEncoder != null) { encoder = (UTF8Encoder)baseEncoder; ch = encoder.surrogateChar; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow) throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", this.EncodingName, encoder.Fallback.GetType())); // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(chars, pEnd, encoder, true); } } for (;;) { // SLOWLOOP: does all range checks, handles all special cases, but it is slow if (pSrc >= pEnd) { if (ch == 0) { // Check if there's anthing left to get out of the fallback buffer ch = fallbackBuffer != null ? fallbackBuffer.InternalGetNextChar() : 0; if (ch > 0) { goto ProcessChar; } } else { // Case of leftover surrogates in the fallback buffer if (fallbackBuffer != null && fallbackBuffer.bFallingBack) { Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF, "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture)); int cha = ch; ch = fallbackBuffer.InternalGetNextChar(); if (InRange(ch, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { ch = ch + (cha << 10) + (0x10000 - CharUnicodeInfo.LOW_SURROGATE_START - (CharUnicodeInfo.HIGH_SURROGATE_START << 10)); goto EncodeChar; } else if (ch > 0){ goto ProcessChar; } else { break; } } } // attempt to encode the partial surrogate (will fail or ignore) if (ch > 0 && (encoder == null || encoder.MustFlush)) goto EncodeChar; // We're done break; } if (ch > 0) { // We have a high surrogate left over from a previous loop. Contract.Assert(ch >= 0xD800 && ch <= 0xDBFF, "[UTF8Encoding.GetBytes]expected high surrogate, not 0x" + ((int)ch).ToString("X4", CultureInfo.InvariantCulture)); // use separate helper variables for local contexts so that the jit optimizations // won't get confused about the variable lifetimes int cha = *pSrc; // In previous byte, we encountered a high surrogate, so we are expecting a low surrogate here. // if (IsLowSurrogate(cha)) { if (InRange(cha, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { ch = cha + (ch << 10) + (0x10000 - CharUnicodeInfo.LOW_SURROGATE_START - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) ); pSrc++; } // else ch is still high surrogate and encoding will fail // attempt to encode the surrogate or partial surrogate goto EncodeChar; } // If we've used a fallback, then we have to check for it if (fallbackBuffer != null) { ch = fallbackBuffer.InternalGetNextChar(); if (ch > 0) goto ProcessChar; } // read next char. The JIT optimization seems to be getting confused when // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead ch = *pSrc; pSrc++; ProcessChar: // if (IsHighSurrogate(ch)) { if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END)) { continue; } // either good char or partial surrogate EncodeChar: // throw exception on partial surrogate if necessary // if (IsLowSurrogate(ch) || IsHighSurrogate(ch)) if (InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { // Lone surrogates aren't allowed, we have to do fallback for them // Have to make a fallback buffer if we don't have one if (fallbackBuffer == null) { // wait on fallbacks if we can // For fallback we may need a fallback buffer if (baseEncoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = baseEncoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(chars, pEnd, baseEncoder, true); } // Do our fallback. Actually we already know its a mixed up surrogate, // so the ref pSrc isn't gonna do anything. fallbackBuffer.InternalFallback(unchecked((char)ch), ref pSrc); // Ignore it if we don't throw ch = 0; continue; } // Count bytes needed int bytesNeeded = 1; if (ch > 0x7F) { if (ch > 0x7FF) { if (ch > 0xFFFF) { bytesNeeded++; // 4 bytes (surrogate pair) } bytesNeeded++; // 3 bytes (800-FFFF) } bytesNeeded++; // 2 bytes (80-7FF) } if (pTarget > pAllocatedBufferEnd - bytesNeeded) { // Left over surrogate from last time will cause pSrc == chars, so we'll throw if (fallbackBuffer != null && fallbackBuffer.bFallingBack) { fallbackBuffer.MovePrevious(); // Didn't use this fallback char if (ch > 0xFFFF) fallbackBuffer.MovePrevious(); // Was surrogate, didn't use 2nd part either } else { pSrc--; // Didn't use this char if (ch > 0xFFFF) pSrc--; // Was surrogate, didn't use 2nd part either } Contract.Assert(pSrc >= chars || pTarget == bytes, "[UTF8Encoding.GetBytes]Expected pSrc to be within buffer or to throw with insufficient room."); ThrowBytesOverflow(encoder, pTarget == bytes); // Throw if we must ch = 0; // Nothing left over (we backed up to start of pair if supplimentary) break; } if (ch <= 0x7F) { *pTarget = (byte)ch; } else { // use separate helper variables for local contexts so that the jit optimizations // won't get confused about the variable lifetimes int chb; if (ch <= 0x7FF) { // 2 byte encoding chb = (byte)(unchecked((sbyte)0xC0) | (ch >> 6)); } else { if (ch <= 0xFFFF) { chb = (byte)(unchecked((sbyte)0xE0) | (ch >> 12)); } else { *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18)); pTarget++; chb = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F; } *pTarget = (byte)chb; pTarget++; chb = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F; } *pTarget = (byte)chb; pTarget++; *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F); } pTarget++; #if FASTLOOP // If still have fallback don't do fast loop if (fallbackBuffer != null && (ch = fallbackBuffer.InternalGetNextChar()) != 0) goto ProcessChar; int availableChars = PtrDiff(pEnd, pSrc); int availableBytes = PtrDiff(pAllocatedBufferEnd, pTarget); // don't fall into the fast decoding loop if we don't have enough characters // Note that if we don't have enough bytes, pStop will prevent us from entering the fast loop. if (availableChars <= 13) { // we are hoping for 1 byte per char if (availableBytes < availableChars) { // not enough output room. no pending bits at this point ch = 0; continue; } // try to get over the remainder of the ascii characters fast though char* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered while (pSrc < pLocalEnd) { ch = *pSrc; pSrc++; // Not ASCII, need more than 1 byte per char if (ch > 0x7F) goto ProcessChar; *pTarget = (byte)ch; pTarget++; } // we are done, let ch be 0 to clear encoder ch = 0; break; } // we need at least 1 byte per character, but Convert might allow us to convert // only part of the input, so try as much as we can. Reduce charCount if necessary if (availableBytes < availableChars) { availableChars = availableBytes; } // FASTLOOP: // - optimistic range checks // - fallbacks to the slow loop for all special cases, exception throwing, etc. // To compute the upper bound, assume that all characters are ASCII characters at this point, // the boundary will be decreased for every non-ASCII character we encounter // Also, we need 5 chars reserve for the unrolled ansi decoding loop and for decoding of surrogates // If there aren't enough bytes for the output, then pStop will be <= pSrc and will bypass the loop. char *pStop = pSrc + availableChars - 5; while (pSrc < pStop) { ch = *pSrc; pSrc++; if (ch > 0x7F) { goto LongCode; } *pTarget = (byte)ch; pTarget++; // get pSrc aligned if ((unchecked((int)pSrc) & 0x2) != 0) { ch = *pSrc; pSrc++; if (ch > 0x7F) { goto LongCode; } *pTarget = (byte)ch; pTarget++; } // Run 4 characters at a time! while (pSrc < pStop) { ch = *(int*)pSrc; int chc = *(int*)(pSrc+2); if (((ch | chc) & unchecked((int)0xFF80FF80)) != 0) { goto LongCodeWithMask; } // Unfortunately, this is endianess sensitive #if BIGENDIAN *pTarget = (byte)(ch>>16); *(pTarget+1) = (byte)ch; pSrc += 4; *(pTarget+2) = (byte)(chc>>16); *(pTarget+3) = (byte)chc; pTarget += 4; #else // BIGENDIAN *pTarget = (byte)ch; *(pTarget+1) = (byte)(ch>>16); pSrc += 4; *(pTarget+2) = (byte)chc; *(pTarget+3) = (byte)(chc>>16); pTarget += 4; #endif // BIGENDIAN } continue; LongCodeWithMask: #if BIGENDIAN // be careful about the sign extension ch = (int)(((uint)ch) >> 16); #else // BIGENDIAN ch = (char)ch; #endif // BIGENDIAN pSrc++; if (ch > 0x7F) { goto LongCode; } *pTarget = (byte)ch; pTarget++; continue; LongCode: // use separate helper variables for slow and fast loop so that the jit optimizations // won't get confused about the variable lifetimes int chd; if (ch <= 0x7FF) { // 2 byte encoding chd = unchecked((sbyte)0xC0) | (ch >> 6); } else { // if (!IsLowSurrogate(ch) && !IsHighSurrogate(ch)) if (!InRange(ch, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { // 3 byte encoding chd = unchecked((sbyte)0xE0) | (ch >> 12); } else { // 4 byte encoding - high surrogate + low surrogate // if (!IsHighSurrogate(ch)) if (ch > CharUnicodeInfo.HIGH_SURROGATE_END) { // low without high -> bad, try again in slow loop pSrc -= 1; break; } chd = *pSrc; pSrc++; // if (!IsLowSurrogate(chd)) { if (!InRange(chd, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END)) { // high not followed by low -> bad, try again in slow loop pSrc -= 2; break; } ch = chd + (ch << 10) + (0x10000 - CharUnicodeInfo.LOW_SURROGATE_START - (CharUnicodeInfo.HIGH_SURROGATE_START << 10) ); *pTarget = (byte)(unchecked((sbyte)0xF0) | (ch >> 18)); // pStop - this byte is compensated by the second surrogate character // 2 input chars require 4 output bytes. 2 have been anticipated already // and 2 more will be accounted for by the 2 pStop-- calls below. pTarget++; chd = unchecked((sbyte)0x80) | (ch >> 12) & 0x3F; } *pTarget = (byte)chd; pStop--; // 3 byte sequence for 1 char, so need pStop-- and the one below too. pTarget++; chd = unchecked((sbyte)0x80) | (ch >> 6) & 0x3F; } *pTarget = (byte)chd; pStop--; // 2 byte sequence for 1 char so need pStop--. pTarget++; *pTarget = (byte)(unchecked((sbyte)0x80) | ch & 0x3F); // pStop - this byte is already included pTarget++; } Contract.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetBytes]pTarget <= pAllocatedBufferEnd"); #endif // FASTLOOP // no pending char at this point ch = 0; } // Do we have to set the encoder bytes? if (encoder != null) { Contract.Assert(!encoder.MustFlush || ch == 0, "[UTF8Encoding.GetBytes] Expected no mustflush or 0 leftover ch " + ch.ToString("X2", CultureInfo.InvariantCulture)); encoder.surrogateChar = ch; encoder.m_charsUsed = (int)(pSrc - chars); } Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 || baseEncoder == null || !baseEncoder.m_throwOnOverflow, "[UTF8Encoding.GetBytes]Expected empty fallback buffer if not converting"); return (int)(pTarget - bytes); }
// For NLS Encodings, workhorse takes an encoder (may be null) // Always validate parameters before calling internal version, which will only assert. internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) { Debug.Assert(chars != null); Debug.Assert(count >= 0); return GetByteCount(chars, count); }
internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { char charLeftOver = (char)0; char ch; bool wasHereBefore = false; byte* byteEnd = bytes + byteCount; char* charEnd = chars + charCount; byte* byteStart = bytes; char* charStart = chars; // For fallback we may need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; // Get our encoder, but don't clear it yet. if (encoder != null) { charLeftOver = encoder.charLeftOver; // We mustn't have left over fallback data when counting if (encoder.InternalHasFallbackBuffer) { // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary fallbackBuffer = encoder.FallbackBuffer; if (fallbackBuffer.Remaining > 0 && encoder.m_throwOnOverflow) throw new ArgumentException("EncoderFallbackNotEmpty"); // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false); } } TryAgain: while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, maybe we can do it fast #if !NO_FAST_UNICODE_LOOP #if BIGENDIAN // If endianess is backwards then each pair of bytes would be backwards. if ( bigEndian && #else if (!bigEndian && #endif // BIGENDIAN #if WIN64 // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 && #else (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 && #endif // WIN64 charLeftOver == 0) { // Need -1 to check 2 at a time. If we have an even #, longChars will go // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars // will go from longEnd - 1 long to longEnd. (Might not get to use this) // We can only go iCount units (limited by shorter of char or byte buffers. ulong* longEnd = (ulong*)(chars - 3 + (((byteEnd - bytes) >> 1 < charEnd - chars) ? (byteEnd - bytes) >> 1 : charEnd - chars)); // Need new char* so we can check 4 at a time ulong* longChars = (ulong*)chars; ulong* longBytes = (ulong*)bytes; while (longChars < longEnd) { // See if we potentially have surrogates (0x8000 bit set) // (We're either big endian on a big endian machine or little endian on // a little endian machine so this'll work) if ((0x8000800080008000 & *longChars) != 0) { // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high // 5 bits looks like 11011, then its a high or low surrogate. // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set. // Note that we expect BMP characters to be more common than surrogates // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800; // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate // but no clue if they're high or low. // If each of the 4 characters are non-zero, then none are surrogates. if ((uTemp & 0xFFFF000000000000) == 0 || (uTemp & 0x0000FFFF00000000) == 0 || (uTemp & 0x00000000FFFF0000) == 0 || (uTemp & 0x000000000000FFFF) == 0) { // It has at least 1 surrogate, but we don't know if they're high or low surrogates, // or if there's 1 or 4 surrogates // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern #if BIGENDIAN if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0) #else if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0) #endif { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. // Drop out to the slow loop to resolve the surrogates break; } // else they are all surrogates in High/Low/High/Low order, so we can use them. } // else none are surrogates, so we can use them. } // else all < 0x8000 so we can use them // We can use these 4 chars. *longBytes = *longChars; longChars++; longBytes++; } chars = (char*)longChars; bytes = (byte*)longBytes; if (chars >= charEnd) break; } // Not aligned, but maybe we can still be somewhat faster // Also somehow this optimizes the above loop? It seems to cause something above // to get enregistered, but I haven't figured out how to make that happen without this loop. else if ((charLeftOver == 0) && #if BIGENDIAN bigEndian && #else !bigEndian && #endif // BIGENDIAN #if WIN64 (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) && // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time #else (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) && // Only do this if chars & bytes are out of line, otherwise faster loop'll be faster next time #endif // WIN64 (unchecked((int)(bytes)) & 1) == 0) { // # to use long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ? (byteEnd - bytes) >> 1 : charEnd - chars; // Need new char* char* charOut = ((char*)bytes); // a char* for our output char* tempEnd = chars + iCount - 1; // Our end pointer while (chars < tempEnd) { if (*chars >= (char)0xd800 && *chars <= (char)0xdfff) { // break for fallback for low surrogate if (*chars >= 0xdc00) break; // break if next one's not a low surrogate (will do fallback) if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff) break; // They both exist, use them } // If 2nd char is surrogate & this one isn't then only add one else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff) { *charOut = *chars; charOut++; chars++; continue; } *charOut = *chars; *(charOut + 1) = *(chars + 1); charOut += 2; chars += 2; } bytes = (byte*)charOut; if (chars >= charEnd) break; } #endif // !NO_FAST_UNICODE_LOOP // No fallback, just get next char ch = *chars; chars++; } // Check for high or low surrogates if (ch >= 0xd800 && ch <= 0xdfff) { // Was it a high surrogate? if (ch <= 0xdbff) { // Its a high surrogate, see if we already had a high surrogate if (charLeftOver > 0) { // Unwind the current character, this should be safe because we // don't have leftover data in the fallback, so chars must have // advanced already. chars--; // Fallback the previous surrogate // Might need to create our fallback buffer if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } fallbackBuffer.InternalFallback(charLeftOver, ref chars); charLeftOver = (char)0; continue; } // Remember this high surrogate charLeftOver = ch; continue; } // Its a low surrogate if (charLeftOver == 0) { // We'll fall back this one // Might need to create our fallback buffer if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } fallbackBuffer.InternalFallback(ch, ref chars); continue; } // Valid surrogate pair, add our charLeftOver if (bytes + 3 >= byteEnd) { // Not enough room to add this surrogate pair if (fallbackBuffer != null && fallbackBuffer.bFallingBack) { // These must have both been from the fallbacks. // Both of these MUST have been from a fallback because if the 1st wasn't // from a fallback, then a high surrogate followed by an illegal char // would've caused the high surrogate to fall back. If a high surrogate // fell back, then it was consumed and both chars came from the fallback. fallbackBuffer.MovePrevious(); // Didn't use either fallback surrogate fallbackBuffer.MovePrevious(); } else { // If we don't have enough room, then either we should've advanced a while // or we should have bytes==byteStart and throw below chars -= 2; // Didn't use either surrogate } ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written) charLeftOver = (char)0; // we'll retry it later break; // Didn't throw, but stop 'til next time. } if (bigEndian) { *(bytes++) = (byte)(charLeftOver >> 8); *(bytes++) = (byte)charLeftOver; } else { *(bytes++) = (byte)charLeftOver; *(bytes++) = (byte)(charLeftOver >> 8); } charLeftOver = (char)0; } else if (charLeftOver > 0) { // Expected a low surrogate, but this char is normal // Rewind the current character, fallback previous character. // this should be safe because we don't have leftover data in the // fallback, so chars must have advanced already. chars--; // fallback previous chars // Might need to create our fallback buffer if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } fallbackBuffer.InternalFallback(charLeftOver, ref chars); // Ignore charLeftOver or throw charLeftOver = (char)0; continue; } // Ok, we have a char to add if (bytes + 1 >= byteEnd) { // Couldn't add this char if (fallbackBuffer != null && fallbackBuffer.bFallingBack) fallbackBuffer.MovePrevious(); // Not using this fallback char else { // Lonely charLeftOver (from previous call) would've been caught up above, // so this must be a case where we've already read an input char. chars--; // Not using this char } ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written) break; // didn't throw, just stop } if (bigEndian) { *(bytes++) = (byte)(ch >> 8); *(bytes++) = (byte)ch; } else { *(bytes++) = (byte)ch; *(bytes++) = (byte)(ch >> 8); } } // Don't allocate space for left over char if (charLeftOver > 0) { // If we aren't flushing we need to fall this back if (encoder == null || encoder.MustFlush) { if (wasHereBefore) { // Throw it, using our complete character throw new ArgumentException("RecursiveFallback"); } else { // If we have to flush, stick it in fallback and try again // Might need to create our fallback buffer if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); } // If we're not flushing, this'll remember the left over character. fallbackBuffer.InternalFallback(charLeftOver, ref chars); charLeftOver = (char)0; wasHereBefore = true; goto TryAgain; } } } // Not flushing, remember it in the encoder if (encoder != null) { encoder.charLeftOver = charLeftOver; encoder.m_charsUsed = (int)(chars - charStart); } // We used to copy it fast, but this doesn't check for surrogates // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount); return (int)(bytes - byteStart); }
internal override unsafe int GetBytes(char *chars, int charCount, byte *bytes, int byteCount, EncoderNLS encoder) { char ch2; char *charStart = chars; char *charEnd = chars + charCount; byte *numPtr = bytes; byte *numPtr2 = bytes + byteCount; char cHigh = '\0'; EncoderFallbackBuffer fallbackBuffer = null; if (encoder != null) { cHigh = encoder.charLeftOver; fallbackBuffer = encoder.FallbackBuffer; if (encoder.m_throwOnOverflow && (fallbackBuffer.Remaining > 0)) { throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", new object[] { this.EncodingName, encoder.Fallback.GetType() })); } } else { fallbackBuffer = base.encoderFallback.CreateFallbackBuffer(); } fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true); Label_023F: while (((ch2 = fallbackBuffer.InternalGetNextChar()) != '\0') || (chars < charEnd)) { if (ch2 == '\0') { ch2 = chars[0]; chars++; } if (cHigh != '\0') { if (char.IsLowSurrogate(ch2)) { uint surrogate = this.GetSurrogate(cHigh, ch2); cHigh = '\0'; if ((bytes + 3) >= numPtr2) { if (fallbackBuffer.bFallingBack) { fallbackBuffer.MovePrevious(); fallbackBuffer.MovePrevious(); } else { chars -= 2; } base.ThrowBytesOverflow(encoder, bytes == numPtr); cHigh = '\0'; break; } if (this.bigEndian) { bytes++; bytes[0] = 0; bytes++; bytes[0] = (byte)(surrogate >> 0x10); bytes++; bytes[0] = (byte)(surrogate >> 8); bytes++; bytes[0] = (byte)surrogate; } else { bytes++; bytes[0] = (byte)surrogate; bytes++; bytes[0] = (byte)(surrogate >> 8); bytes++; bytes[0] = (byte)(surrogate >> 0x10); bytes++; bytes[0] = 0; } } else { chars--; fallbackBuffer.InternalFallback(cHigh, ref chars); cHigh = '\0'; } } else if (char.IsHighSurrogate(ch2)) { cHigh = ch2; } else { if (char.IsLowSurrogate(ch2)) { fallbackBuffer.InternalFallback(ch2, ref chars); continue; } if ((bytes + 3) >= numPtr2) { if (fallbackBuffer.bFallingBack) { fallbackBuffer.MovePrevious(); } else { chars--; } base.ThrowBytesOverflow(encoder, bytes == numPtr); break; } if (this.bigEndian) { bytes++; bytes[0] = 0; bytes++; bytes[0] = 0; bytes++; bytes[0] = (byte)(ch2 >> 8); bytes++; bytes[0] = (byte)ch2; } else { bytes++; bytes[0] = (byte)ch2; bytes++; bytes[0] = (byte)(ch2 >> 8); bytes++; bytes[0] = 0; bytes++; bytes[0] = 0; } } } if (((encoder == null) || encoder.MustFlush) && (cHigh > '\0')) { fallbackBuffer.InternalFallback(cHigh, ref chars); cHigh = '\0'; goto Label_023F; } if (encoder != null) { encoder.charLeftOver = cHigh; encoder.m_charsUsed = (int)((long)((chars - charStart) / 2)); } return((int)((long)((bytes - numPtr) / 1))); }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[DBCSCodePageEncoding.GetBytes]Attempting to use null encoder fallback"); CheckMemorySection(); // For fallback we will need a fallback buffer EncoderFallbackBuffer fallbackBuffer = null; // prepare our end char* charEnd = chars + charCount; char* charStart = chars; byte* byteStart = bytes; byte* byteEnd = bytes + byteCount; EncoderFallbackBufferHelper fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); // Get any left over characters char charLeftOver = (char)0; if (encoder != null) { charLeftOver = encoder.charLeftOver; Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[DBCSCodePageEncoding.GetBytes]leftover character should be high surrogate"); // Go ahead and get the fallback buffer (need leftover fallback if converting) fallbackBuffer = encoder.FallbackBuffer; fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(chars, charEnd, encoder, true); // If we're not converting we must not have a fallback buffer if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0) throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, EncodingName, encoder.Fallback.GetType())); // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { Debug.Assert(encoder != null, "[DBCSCodePageEncoding.GetBytes]Expect to have encoder if we have a charLeftOver"); // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback fallbackHelper.InternalFallback(charLeftOver, ref chars); } } // Now we may have fallback char[] already from the encoder // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackHelper.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // get byte for this char ushort sTemp = mapUnicodeToBytes[ch]; // Check for fallback, this'll catch surrogate pairs too. if (sTemp == 0 && ch != (char)0) { if (fallbackBuffer == null) { // Initialize the buffer Debug.Assert(encoder == null, "[DBCSCodePageEncoding.GetBytes]Expected delayed create fallback only if no encoder."); fallbackBuffer = EncoderFallback.CreateFallbackBuffer(); fallbackHelper = new EncoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } // Get Fallback fallbackHelper.InternalFallback(ch, ref chars); continue; } // We'll use this one (or two) // Bounds check // Go ahead and add it, lead byte 1st if necessary if (sTemp >= 0x100) { if (bytes + 1 >= byteEnd) { // didn't use this char, we'll throw or use buffer if (fallbackBuffer == null || fallbackHelper.bFallingBack == false) { Debug.Assert(chars > charStart, "[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (double byte case)"); chars--; // don't use last char } else fallbackBuffer.MovePrevious(); // don't use last fallback ThrowBytesOverflow(encoder, chars == charStart); // throw ? break; // don't throw, stop } *bytes = unchecked((byte)(sTemp >> 8)); bytes++; } // Single byte else if (bytes >= byteEnd) { // didn't use this char, we'll throw or use buffer if (fallbackBuffer == null || fallbackHelper.bFallingBack == false) { Debug.Assert(chars > charStart, "[DBCSCodePageEncoding.GetBytes]Expected chars to have advanced (single byte case)"); chars--; // don't use last char } else fallbackBuffer.MovePrevious(); // don't use last fallback ThrowBytesOverflow(encoder, chars == charStart); // throw ? break; // don't throw, stop } *bytes = unchecked((byte)(sTemp & 0xff)); bytes++; } // encoder stuff if we have one if (encoder != null) { // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases if (fallbackBuffer != null && !fallbackHelper.bUsedEncoder) // Clear it in case of MustFlush encoder.charLeftOver = (char)0; // Set our chars used count encoder.m_charsUsed = (int)(chars - charStart); } return (int)(bytes - byteStart); }
[System.Security.SecurityCritical] // auto-generated internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder, byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount) { this.enc = inEncoding; this.encoder = inEncoder; this.charStart = inCharStart; this.chars = inCharStart; this.charEnd = inCharStart + inCharCount; this.bytes = inByteStart; this.byteStart = inByteStart; this.byteEnd = inByteStart + inByteCount; if (this.encoder == null) this.fallbackBuffer = enc.EncoderFallback.CreateFallbackBuffer(); else { this.fallbackBuffer = this.encoder.FallbackBuffer; // If we're not converting we must not have data in our fallback buffer if (encoder.m_throwOnOverflow && encoder.InternalHasFallbackBuffer && this.fallbackBuffer.Remaining > 0) throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", encoder.Encoding.EncodingName, encoder.Fallback.GetType())); } fallbackBuffer.InternalInitialize(chars, charEnd, encoder, bytes != null); }
// Workhorse internal override unsafe int GetBytes(char *chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) { // Allow null bytes for counting BCLDebug.Assert(chars != null, "[ISCIIEncoding.GetBytes]chars!=null"); // BCLDebug.Assert(bytes != null, "[ISCIIEncoding.GetBytes]bytes!=null"); BCLDebug.Assert(charCount >=0, "[ISCIIEncoding.GetBytes]charCount >=0"); BCLDebug.Assert(byteCount >=0, "[ISCIIEncoding.GetBytes]byteCount >=0"); // Need the ISCII Encoder ISCIIEncoder encoder = (ISCIIEncoder) baseEncoder; // prepare our helpers Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( this, encoder, bytes, byteCount, chars, charCount); int currentCodePage = this.defaultCodePage; bool bLastVirama = false; // Use encoder info if available if (encoder != null) { // Remember our old state currentCodePage = encoder.currentCodePage; bLastVirama = encoder.bLastVirama; // If we have a high surrogate left over, then fall it back if (encoder.charLeftOver > 0) { buffer.Fallback(encoder.charLeftOver); bLastVirama = false; // Redundant } } while (buffer.MoreData) { // Get our data char ch = buffer.GetNextChar(); // See if its a Multi Byte Character if (ch < MultiByteBegin) { // Its a boring low character, add it. if (!buffer.AddByte((byte)ch)) break; bLastVirama = false; continue; } // See if its outside of the Indic script Range range if ((ch < IndicBegin) || (ch > IndicEnd)) { // See if its a ZWJ or ZWNJ and if we has bLastVirama; if (bLastVirama && (ch == ZWNJ || ch == ZWJ)) { // It was a bLastVirama and ZWNJ || ZWJ if (ch == ZWNJ) { if (!buffer.AddByte(Virama)) break; } else // ZWJ { if (!buffer.AddByte(Nukta)) break; } // bLastVirama now counts as false bLastVirama = false; continue; } // Have to do our fallback // // Note that this will fallback 2 chars if this is a high surrogate. // Throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); bLastVirama = false; continue; } // Its in the Unicode Indic script range int indicInfo = UnicodeToIndicChar[ch - IndicBegin]; byte byteIndic = (byte)indicInfo; int indicScript = (0x000f & (indicInfo >> 8)); int indicTwoBytes = (0xf000 & indicInfo); // If IndicInfo is 0 then have to do fallback if (indicInfo == 0) { // Its some Unicode character we don't have indic for. // Have to do our fallback // Add Fallback Count // Note that chars was preincremented, and GetEncoderFallbackString might add an extra // if chars != charEnd and there's a surrogate. // Throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); bLastVirama = false; continue; } // See if our code page ("font" in ISCII spec) has to change // (This if doesn't add character, just changes character set) BCLDebug.Assert(indicScript!=0, "[ISCIIEncoding.GetBytes]expected an indic script value"); if (indicScript != currentCodePage) { // It changed, spit out the ATR if (!buffer.AddByte(ControlATR, (byte)(indicScript | ControlCodePageStart))) break; // Now spit out the new code page (& remember it) (do this afterwards in case AddByte failed) currentCodePage = indicScript; // We only know how to map from Unicode to pages from Devanagari to Punjabi (2 to 11) BCLDebug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi, "[ISCIIEncoding.GetBytes]Code page (" + currentCodePage + " shouldn't appear in ISCII from Unicode table!"); } // Safe to add our byte now if (!buffer.AddByte(byteIndic, indicTwoBytes != 0 ? 1:0)) break; // Remember if this one was a Virama bLastVirama = (byteIndic == Virama); // Some characters need extra bytes if (indicTwoBytes != 0) { // This one needs another byte BCLDebug.Assert((indicTwoBytes >> 12) > 0 && (indicTwoBytes >> 12) <= 3, "[ISCIIEncoding.GetBytes]Expected indicTwoBytes from 1-3, not " + (indicTwoBytes >> 12)); // Already did buffer checking, but... if (!buffer.AddByte(SecondIndicByte[indicTwoBytes >> 12])) break; } } // May need to switch back to our default code page if (currentCodePage != defaultCodePage && (encoder == null || encoder.MustFlush)) { // It changed, spit out the ATR if (buffer.AddByte(ControlATR, (byte)(defaultCodePage | ControlCodePageStart))) currentCodePage = defaultCodePage; else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); bLastVirama = false; } // Make sure we remember our state if necessary // Note that we don't care about flush because Virama and code page // changes are legal at the end. // Don't set encoder if we're just counting if (encoder != null && bytes != null) { // Clear Encoder if necessary. if (!buffer.fallbackBuffer.bUsedEncoder) { encoder.charLeftOver = (char)0; } // Remember our code page/virama state encoder.currentCodePage = currentCodePage; encoder.bLastVirama = bLastVirama; // How many chars were used? encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }
internal override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[Latin1Encoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[Latin1Encoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[Latin1Encoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[Latin1Encoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(encoderFallback != null, "[Latin1Encoding.GetBytes]Attempting to use null encoder fallback"); // Get any left over characters & check fast or slower fallback type char charLeftOver = (char)0; EncoderReplacementFallback fallback = null; if (encoder != null) { charLeftOver = encoder.charLeftOver; fallback = encoder.Fallback as EncoderReplacementFallback; Debug.Assert(charLeftOver == 0 || Char.IsHighSurrogate(charLeftOver), "[Latin1Encoding.GetBytes]leftover character should be high surrogate"); // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert Debug.Assert(!encoder.m_throwOnOverflow || !encoder.InternalHasFallbackBuffer || encoder.FallbackBuffer.Remaining == 0, "[Latin1CodePageEncoding.GetBytes]Expected empty fallback buffer"); } else { fallback = this.EncoderFallback as EncoderReplacementFallback; } // prepare our end char* charEnd = chars + charCount; byte* byteStart = bytes; char* charStart = chars; // See if we do the fast default or slightly slower fallback if (fallback != null && fallback.MaxCharCount == 1) { // Fast version char cReplacement=fallback.DefaultString[0]; // Check for replacements in range, otherwise fall back to slow version. if (cReplacement <= (char)0xff) { // We should have exactly as many output bytes as input bytes, unless there's a left // over character, in which case we may need one more. // If we had a left over character will have to add a ? (This happens if they had a funky // fallback last time, but not this time.) (We can't spit any out though // because with fallback encoder each surrogate is treated as a seperate code point) if (charLeftOver > 0) { // Have to have room // Throw even if doing no throw version because this is just 1 char, // so buffer will never be big enough if (byteCount == 0) ThrowBytesOverflow(encoder, true); // This'll make sure we still have more room and also make sure our return value is correct. *(bytes++) = (byte)cReplacement; byteCount--; // We used one of the ones we were counting. } // This keeps us from overrunning our output buffer if (byteCount < charCount) { // Throw or make buffer smaller? ThrowBytesOverflow(encoder, byteCount < 1); // Just use what we can charEnd = chars + byteCount; } // We just do a quick copy while (chars < charEnd) { char ch2 = *(chars++); if (ch2 > 0x00ff) *(bytes++) = (byte)cReplacement; else *(bytes++) = (byte)ch2; } // Clear encoder if (encoder != null) { encoder.charLeftOver = (char)0; encoder.m_charsUsed = (int)(chars-charStart); } return (int)(bytes - byteStart); } } // Slower version, have to do real fallback. // prepare our end byte* byteEnd = bytes + byteCount; // For fallback we may need a fallback buffer, we know we aren't default fallback, create & init it EncoderFallbackBuffer fallbackBuffer = null; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback Debug.Assert(encoder != null, "[Latin1Encoding.GetBytes]Expected encoder if we have charLeftOver"); fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true); // Since left over char was a surrogate, it'll have to be fallen back. // Get Fallback // This will fallback a pair if *chars is a low surrogate fallbackBuffer.InternalFallback(charLeftOver, ref chars); if (fallbackBuffer.Remaining > byteEnd - bytes) { // Throw it, if we don't have enough for this we never will ThrowBytesOverflow(encoder, true); } } // Now we may have fallback char[] already from the encoder fallback above // Go ahead and do it, including the fallback. char ch; while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 || chars < charEnd) { // First unwind any fallback if (ch == 0) { // No fallback, just get next char ch = *chars; chars++; } // Check for fallback, this'll catch surrogate pairs too. // All characters >= 0x100 must fall back. if (ch > 0xff) { // Initialize the buffer if (fallbackBuffer == null) { if (encoder == null) fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); else fallbackBuffer = encoder.FallbackBuffer; fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true); } // Get Fallback fallbackBuffer.InternalFallback(ch, ref chars); // Make sure we have enough room. Each fallback char will be 1 output char // (or else cause a recursion exception) if (fallbackBuffer.Remaining > byteEnd - bytes) { // Didn't use this char, throw it. Chars should've advanced by now // If we had encoder fallback data it would've thrown before the loop Debug.Assert(chars > charStart, "[Latin1Encoding.GetBytes]Expected chars to have advanced (fallback case)"); chars--; fallbackBuffer.InternalReset(); // Throw it ThrowBytesOverflow(encoder, chars == charStart); break; } continue; } // We'll use this one // Bounds check if (bytes >= byteEnd) { // didn't use this char, we'll throw or use buffer Debug.Assert(fallbackBuffer == null || fallbackBuffer.bFallingBack == false, "[Latin1Encoding.GetBytes]Expected fallback to have throw initially if insufficient space"); if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false) { Debug.Assert(chars > charStart, "[Latin1Encoding.GetBytes]Expected chars to have advanced (fallback case)"); chars--; // don't use last char } ThrowBytesOverflow(encoder, chars == charStart); // throw ? break; // don't throw, stop } // Go ahead and add it *bytes = unchecked((byte)ch); bytes++; } // Need to do encoder stuff if (encoder != null) { // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder) // Clear it in case of MustFlush encoder.charLeftOver = (char)0; // Set our chars used count encoder.m_charsUsed = (int)(chars - charStart); } Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[Latin1Encoding.GetBytes]Expected Empty fallback buffer"); return (int)(bytes - byteStart); }