[System.Security.SecurityCritical] // auto-generated internal unsafe EncodingCharBuffer(EncodingNLS enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount) { _enc = enc; _decoder = decoder; _chars = charStart; _charStart = charStart; _charEnd = charStart + charCount; _byteStart = byteStart; _bytes = byteStart; _byteEnd = byteStart + byteCount; if (_decoder == null) _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer(); else _fallbackBuffer = _decoder.FallbackBuffer; // If we're getting chars or getting char count we don't expect to have // to remember fallbacks between calls (so it should be empty) Debug.Assert(_fallbackBuffer.Remaining == 0, "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount"); _fallbackBufferHelper = new DecoderFallbackBufferHelper(_fallbackBuffer); _fallbackBufferHelper.InternalInitialize(_bytes, _charEnd); }
internal unsafe EncodingCharBuffer(EncodingNLS enc, DecoderNLS decoder, char *charStart, int charCount, byte *byteStart, int byteCount) { _enc = enc; _decoder = decoder; _chars = charStart; _charStart = charStart; _charEnd = charStart + charCount; _byteStart = byteStart; _bytes = byteStart; _byteEnd = byteStart + byteCount; if (_decoder == null) { _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer(); } else { _fallbackBuffer = _decoder.FallbackBuffer; } // If we're getting chars or getting char count we don't expect to have // to remember fallbacks between calls (so it should be empty) Debug.Assert(_fallbackBuffer.Remaining == 0, "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount"); _fallbackBufferHelper = new DecoderFallbackBufferHelper(_fallbackBuffer); _fallbackBufferHelper.InternalInitialize(_bytes, _charEnd); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { DecoderReplacementFallback replacementFallback = decoder != null ? decoder.Fallback as DecoderReplacementFallback : this.DecoderFallback as DecoderReplacementFallback; if (replacementFallback != null && replacementFallback.MaxCharCount == 1) { return(count); } DecoderFallbackBuffer decoderFallbackBuffer = (DecoderFallbackBuffer)null; int num1 = count; byte[] bytes1 = new byte[1]; byte * numPtr = bytes + count; while (bytes < numPtr) { byte num2 = *bytes; ++bytes; if ((int)num2 >= 128) { if (decoderFallbackBuffer == null) { decoderFallbackBuffer = decoder != null ? decoder.FallbackBuffer : this.DecoderFallback.CreateFallbackBuffer(); decoderFallbackBuffer.InternalInitialize(numPtr - count, (char *)null); } bytes1[0] = num2; num1 = num1 - 1 + decoderFallbackBuffer.InternalFallback(bytes1, bytes); } } return(num1); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder) { Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0"); Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null"); // Just call GetChars with null char* to do counting return(GetChars(bytes, count, null, 0, baseDecoder)); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { base.CheckMemorySection(); bool isMicrosoftBestFitFallback = false; DecoderReplacementFallback decoderFallback = null; if (decoder == null) { decoderFallback = base.DecoderFallback as DecoderReplacementFallback; isMicrosoftBestFitFallback = base.DecoderFallback.IsMicrosoftBestFitFallback; } else { decoderFallback = decoder.Fallback as DecoderReplacementFallback; isMicrosoftBestFitFallback = decoder.Fallback.IsMicrosoftBestFitFallback; } if (isMicrosoftBestFitFallback || ((decoderFallback != null) && (decoderFallback.MaxCharCount == 1))) { return(count); } DecoderFallbackBuffer fallbackBuffer = null; int num = count; byte[] buffer2 = new byte[1]; byte * numPtr = bytes + count; while (bytes < numPtr) { char ch = this.mapBytesToUnicode[bytes[0]]; bytes++; if (ch == 0xfffd) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(numPtr - count, null); } buffer2[0] = *(bytes - 1); num--; num += fallbackBuffer.InternalFallback(buffer2, bytes); } } return(num); }
internal unsafe override int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { base.CheckMemorySection(); DecoderReplacementFallback decoderReplacementFallback; bool isMicrosoftBestFitFallback; if (decoder == null) { decoderReplacementFallback = (base.DecoderFallback as DecoderReplacementFallback); isMicrosoftBestFitFallback = base.DecoderFallback.IsMicrosoftBestFitFallback; } else { decoderReplacementFallback = (decoder.Fallback as DecoderReplacementFallback); isMicrosoftBestFitFallback = decoder.Fallback.IsMicrosoftBestFitFallback; } if (isMicrosoftBestFitFallback || (decoderReplacementFallback != null && decoderReplacementFallback.MaxCharCount == 1)) { return(count); } DecoderFallbackBuffer decoderFallbackBuffer = null; int num = count; byte[] array = new byte[1]; byte * ptr = bytes + count; while (bytes < ptr) { char c = this.mapBytesToUnicode[*bytes]; bytes++; if (c == '�') { if (decoderFallbackBuffer == null) { if (decoder == null) { decoderFallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { decoderFallbackBuffer = decoder.FallbackBuffer; } decoderFallbackBuffer.InternalInitialize(ptr - count, null); } array[0] = *(bytes - 1); num--; num += decoderFallbackBuffer.InternalFallback(array, bytes); } } return(num); }
internal static DecoderFallbackBuffer CreateAndInitialize(Encoding encoding, DecoderNLS decoder, int originalByteCount) { // The original byte count is only used for keeping track of what 'index' value needs // to be passed to the abstract Fallback method. The index value is calculated by subtracting // 'bytes.Length' (where bytes is expected to be the entire remaining input buffer) // from the 'originalByteCount' value specified here. DecoderFallbackBuffer fallbackBuffer = (decoder is null) ? encoding.DecoderFallback.CreateFallbackBuffer() : decoder.FallbackBuffer; fallbackBuffer._encoding = encoding; fallbackBuffer._decoder = decoder; fallbackBuffer._originalByteCount = originalByteCount; return(fallbackBuffer); }
internal unsafe override int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { DecoderReplacementFallback decoderReplacementFallback; if (decoder == null) { decoderReplacementFallback = (base.DecoderFallback as DecoderReplacementFallback); } else { decoderReplacementFallback = (decoder.Fallback as DecoderReplacementFallback); } if (decoderReplacementFallback != null && decoderReplacementFallback.MaxCharCount == 1) { return(count); } DecoderFallbackBuffer decoderFallbackBuffer = null; int num = count; byte[] array = new byte[1]; byte * ptr = bytes + count; while (bytes < ptr) { byte b = *bytes; bytes++; if (b >= 128) { if (decoderFallbackBuffer == null) { if (decoder == null) { decoderFallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { decoderFallbackBuffer = decoder.FallbackBuffer; } decoderFallbackBuffer.InternalInitialize(ptr - count, null); } array[0] = b; num--; num += decoderFallbackBuffer.InternalFallback(array, bytes); } } return(num); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { DecoderReplacementFallback decoderFallback = null; if (decoder == null) { decoderFallback = base.DecoderFallback as DecoderReplacementFallback; } else { decoderFallback = decoder.Fallback as DecoderReplacementFallback; } if ((decoderFallback != null) && (decoderFallback.MaxCharCount == 1)) { return(count); } DecoderFallbackBuffer fallbackBuffer = null; int num = count; byte[] buffer2 = new byte[1]; byte * numPtr = bytes + count; while (bytes < numPtr) { byte num2 = bytes[0]; bytes++; if (num2 >= 0x80) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(numPtr - count, null); } buffer2[0] = num2; num--; num += fallbackBuffer.InternalFallback(buffer2, bytes); } } return(num); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { this.CheckMemorySection(); DecoderReplacementFallback replacementFallback; bool microsoftBestFitFallback; if (decoder == null) { replacementFallback = this.DecoderFallback as DecoderReplacementFallback; microsoftBestFitFallback = this.DecoderFallback.IsMicrosoftBestFitFallback; } else { replacementFallback = decoder.Fallback as DecoderReplacementFallback; microsoftBestFitFallback = decoder.Fallback.IsMicrosoftBestFitFallback; } if (microsoftBestFitFallback || replacementFallback != null && replacementFallback.MaxCharCount == 1) { return(count); } DecoderFallbackBuffer decoderFallbackBuffer = (DecoderFallbackBuffer)null; int num1 = count; byte[] bytes1 = new byte[1]; byte * numPtr = bytes + count; while (bytes < numPtr) { int num2 = (int)this.mapBytesToUnicode[*bytes]; ++bytes; int num3 = 65533; if (num2 == num3) { if (decoderFallbackBuffer == null) { decoderFallbackBuffer = decoder != null ? decoder.FallbackBuffer : this.DecoderFallback.CreateFallbackBuffer(); decoderFallbackBuffer.InternalInitialize(numPtr - count, (char *)null); } bytes1[0] = *(bytes - 1); num1 = num1 - 1 + decoderFallbackBuffer.InternalFallback(bytes1, bytes); } } return(num1); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { GB18030Encoding.GB18030Decoder gb18030Decoder = (GB18030Encoding.GB18030Decoder)baseDecoder; Encoding.EncodingCharBuffer encodingCharBuffer = new Encoding.EncodingCharBuffer((Encoding)this, (DecoderNLS)gb18030Decoder, chars, charCount, bytes, byteCount); short num1 = -1; short num2 = -1; short num3 = -1; short num4 = -1; if (gb18030Decoder != null && (int)gb18030Decoder.bLeftOver1 != -1) { num1 = gb18030Decoder.bLeftOver1; num2 = gb18030Decoder.bLeftOver2; num3 = gb18030Decoder.bLeftOver3; num4 = gb18030Decoder.bLeftOver4; while ((int)num1 != -1) { if (!this.IsGBLeadByte(num1)) { if ((int)num1 <= (int)sbyte.MaxValue) { if (!encodingCharBuffer.AddChar((char)num1)) { break; } } else if (!encodingCharBuffer.Fallback((byte)num1)) { break; } num1 = num2; num2 = num3; num3 = num4; num4 = (short)-1; } else { while ((int)num2 == -1 || this.IsGBFourByteTrailing(num2) && (int)num4 == -1) { if (!encodingCharBuffer.MoreData) { if (!gb18030Decoder.MustFlush) { if ((IntPtr)chars != IntPtr.Zero) { gb18030Decoder.bLeftOver1 = num1; gb18030Decoder.bLeftOver2 = num2; gb18030Decoder.bLeftOver3 = num3; gb18030Decoder.bLeftOver4 = num4; } gb18030Decoder.m_bytesUsed = encodingCharBuffer.BytesUsed; return(encodingCharBuffer.Count); } break; } if ((int)num2 == -1) { num2 = (short)encodingCharBuffer.GetNextByte(); } else if ((int)num3 == -1) { num3 = (short)encodingCharBuffer.GetNextByte(); } else { num4 = (short)encodingCharBuffer.GetNextByte(); } } if (this.IsGBTwoByteTrailing(num2)) { int index = (int)num1 << 8 | (int)(byte)num2; if (encodingCharBuffer.AddChar(this.mapBytesToUnicode[index], 2)) { num1 = (short)-1; num2 = (short)-1; } else { break; } } else if (this.IsGBFourByteTrailing(num2) && this.IsGBLeadByte(num3) && this.IsGBFourByteTrailing(num4)) { int fourBytesOffset = this.GetFourBytesOffset(num1, num2, num3, num4); if (fourBytesOffset <= 39419) { if (!encodingCharBuffer.AddChar(this.map4BytesToUnicode[fourBytesOffset], 4)) { break; } } else if (fourBytesOffset >= 189000 && fourBytesOffset <= 1237575) { int num5 = fourBytesOffset - 189000; if (!encodingCharBuffer.AddChar((char)(55296 + num5 / 1024), (char)(56320 + num5 % 1024), 4)) { break; } } else if (!encodingCharBuffer.Fallback((byte)num1, (byte)num2, (byte)num3, (byte)num4)) { break; } num1 = (short)-1; num2 = (short)-1; num3 = (short)-1; num4 = (short)-1; } else if (encodingCharBuffer.Fallback((byte)num1)) { num1 = num2; num2 = num3; num3 = num4; num4 = (short)-1; } else { break; } } } } while (encodingCharBuffer.MoreData) { byte nextByte1 = encodingCharBuffer.GetNextByte(); if ((int)nextByte1 <= (int)sbyte.MaxValue) { if (!encodingCharBuffer.AddChar((char)nextByte1)) { break; } } else if (this.IsGBLeadByte((short)nextByte1)) { if (encodingCharBuffer.MoreData) { byte nextByte2 = encodingCharBuffer.GetNextByte(); if (this.IsGBTwoByteTrailing((short)nextByte2)) { int index = (int)nextByte1 << 8 | (int)nextByte2; if (!encodingCharBuffer.AddChar(this.mapBytesToUnicode[index], 2)) { break; } } else if (this.IsGBFourByteTrailing((short)nextByte2)) { if (encodingCharBuffer.EvenMoreData(2)) { byte nextByte3 = encodingCharBuffer.GetNextByte(); byte nextByte4 = encodingCharBuffer.GetNextByte(); if (this.IsGBLeadByte((short)nextByte3) && this.IsGBFourByteTrailing((short)nextByte4)) { int fourBytesOffset = this.GetFourBytesOffset((short)nextByte1, (short)nextByte2, (short)nextByte3, (short)nextByte4); if (fourBytesOffset <= 39419) { if (!encodingCharBuffer.AddChar(this.map4BytesToUnicode[fourBytesOffset], 4)) { break; } } else if (fourBytesOffset >= 189000 && fourBytesOffset <= 1237575) { int num5 = fourBytesOffset - 189000; if (!encodingCharBuffer.AddChar((char)(55296 + num5 / 1024), (char)(56320 + num5 % 1024), 4)) { break; } } else if (!encodingCharBuffer.Fallback(nextByte1, nextByte2, nextByte3, nextByte4)) { break; } } else { encodingCharBuffer.AdjustBytes(-3); if (!encodingCharBuffer.Fallback(nextByte1)) { break; } } } else { if (gb18030Decoder != null && !gb18030Decoder.MustFlush) { if ((IntPtr)chars != IntPtr.Zero) { num1 = (short)nextByte1; num2 = (short)nextByte2; num3 = !encodingCharBuffer.MoreData ? (short)-1 : (short)encodingCharBuffer.GetNextByte(); num4 = (short)-1; break; } break; } if (!encodingCharBuffer.Fallback(nextByte1, nextByte2)) { break; } } } else { encodingCharBuffer.AdjustBytes(-1); if (!encodingCharBuffer.Fallback(nextByte1)) { break; } } } else { if (gb18030Decoder != null && !gb18030Decoder.MustFlush) { if ((IntPtr)chars != IntPtr.Zero) { num1 = (short)nextByte1; num2 = (short)-1; num3 = (short)-1; num4 = (short)-1; break; } break; } if (!encodingCharBuffer.Fallback(nextByte1)) { break; } } } else if (!encodingCharBuffer.Fallback(nextByte1)) { break; } } if (gb18030Decoder != null) { if ((IntPtr)chars != IntPtr.Zero) { gb18030Decoder.bLeftOver1 = num1; gb18030Decoder.bLeftOver2 = num2; gb18030Decoder.bLeftOver3 = num3; gb18030Decoder.bLeftOver4 = num4; } gb18030Decoder.m_bytesUsed = encodingCharBuffer.BytesUsed; } return(encodingCharBuffer.Count); }
internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded) { if (decoder == null || decoder.m_throwOnOverflow || nothingDecoded) { if (decoder != null && decoder.InternalHasFallbackBuffer) decoder.FallbackBuffer.InternalReset(); // Special message to include fallback type in case fallback's GetMaxCharCount is broken // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount ThrowCharsOverflow(); } // If we didn't throw, we are in convert and have to remember our flushing decoder.ClearMustFlush(); }
// This is internal and called by something else, internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder) { // Just assert, we're called internally so these should be safe, checked already Debug.Assert(bytes != null, "[Latin1Encoding.GetCharCount]bytes is null"); Debug.Assert(count >= 0, "[Latin1Encoding.GetCharCount]byteCount is negative"); // Just return length, SBCS stay the same length because they don't map to surrogate // pairs and we don't have to fallback because all latin1Encoding code points are unicode return count; }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder) { // Just assert, we're called internally so these should be safe, checked already Contract.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null"); Contract.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative"); // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; if (decoder == null) fallback = this.DecoderFallback as DecoderReplacementFallback; else { fallback = decoder.Fallback as DecoderReplacementFallback; Contract.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer"); } if (fallback != null && fallback.MaxCharCount == 1) { // Just return length, SBCS stay the same length because they don't map to surrogate // pairs and we don't have a decoder fallback. return count; } // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII DecoderFallbackBuffer fallbackBuffer = null; // Have to do it the hard way. // Assume charCount will be == count int charCount = count; byte[] byteBuffer = new byte[1]; // Do it our fast way byte* byteEnd = bytes + count; // Quick loop while (bytes < byteEnd) { // Faster if don't use *bytes++; byte b = *bytes; bytes++; // If unknown we have to do fallback count if (b >= 0x80) { if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; fallbackBuffer.InternalInitialize(byteEnd - count, null); } // Use fallback buffer byteBuffer[0] = b; charCount--; // Have to unreserve the one we already allocated for b charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); } } // Fallback buffer must be empty Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer"); // Converted sequence is same length as input return charCount; }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { // Just assert, we're called internally so these should be safe, checked already Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetCharCount]bytes is null"); Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetCharCount]byteCount is negative"); CheckMemorySection(); // Fix our decoder DBCSDecoder decoder = (DBCSDecoder)baseDecoder; // Get our fallback DecoderFallbackBuffer fallbackBuffer = null; // We'll need to know where the end is byte* byteEnd = bytes + count; int charCount = count; // Assume 1 char / byte // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for count) Debug.Assert(decoder == null || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at start"); DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); // If we have a left over byte, use it if (decoder != null && decoder.bLeftOver > 0) { // We have a left over byte? if (count == 0) { // No input though if (!decoder.MustFlush) { // Don't have to flush return 0; } Debug.Assert(fallbackBuffer == null, "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer"); fallbackBuffer = decoder.FallbackBuffer; fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(bytes, null); byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) }; return fallbackHelper.InternalFallback(byteBuffer, bytes); } // Get our full info int iBytes = decoder.bLeftOver << 8; iBytes |= (*bytes); bytes++; // This is either 1 known char or fallback // Already counted 1 char // Look up our bytes char cDecoder = mapBytesToUnicode[iBytes]; if (cDecoder == 0 && iBytes != 0) { // Deallocate preallocated one charCount--; // We'll need a fallback Debug.Assert(fallbackBuffer == null, "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer for unknown pair"); fallbackBuffer = decoder.FallbackBuffer; fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(byteEnd - count, null); // Do fallback, we know there are 2 bytes byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; charCount += fallbackHelper.InternalFallback(byteBuffer, bytes); } // else we already reserved space for this one. } // Loop, watch out for fallbacks while (bytes < byteEnd) { // Faster if don't use *bytes++; int iBytes = *bytes; bytes++; char c = mapBytesToUnicode[iBytes]; // See if it was a double byte character if (c == LEAD_BYTE_CHAR) { // It's a lead byte charCount--; // deallocate preallocated lead byte if (bytes < byteEnd) { // Have another to use, so use it iBytes <<= 8; iBytes |= *bytes; bytes++; c = mapBytesToUnicode[iBytes]; } else { // No input left if (decoder == null || decoder.MustFlush) { // have to flush anyway, set to unknown so we use fallback charCount++; // reallocate deallocated lead byte c = UNKNOWN_CHAR_FLAG; } else { // We'll stick it in decoder break; } } } // See if it was unknown. // Unknown and known chars already allocated, but fallbacks aren't if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = DecoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(byteEnd - count, null); } // Do fallback charCount--; // Get rid of preallocated extra char byte[] byteBuffer = null; if (iBytes < 0x100) byteBuffer = new byte[] { unchecked((byte)iBytes) }; else byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; charCount += fallbackHelper.InternalFallback(byteBuffer, bytes); } } // Shouldn't have anything in fallback buffer for GetChars Debug.Assert(decoder == null || !decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at end"); // Return our count return charCount; }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder) { // Do it fast way if using ? replacement fallback byte *byteEnd = bytes + byteCount; byte *byteStart = bytes; char *charStart = chars; // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f // Only need decoder fallback buffer if not using ? fallback. // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; if (decoder == null) { fallback = this.DecoderFallback as DecoderReplacementFallback; } else { fallback = decoder.Fallback as DecoderReplacementFallback; } if (fallback != null && fallback.MaxCharCount == 1) { // Try it the fast way char replacementChar = fallback.DefaultString[0]; // Need byteCount chars, otherwise too small buffer if (charCount < byteCount) { // Need at least 1 output byte, throw if must throw ThrowCharsOverflow(decoder, charCount < 1); // Not throwing, use what we can byteEnd = bytes + charCount; } // Quick loop, just do '?' replacement because we don't have fallbacks for decodings. while (bytes < byteEnd) { byte b = *(bytes++); if (b >= 0x80) { // This is an invalid byte in the ASCII encoding. *(chars++) = replacementChar; } else { *(chars++) = unchecked ((char)b); } } // bytes & chars used are the same if (decoder != null) { decoder.m_bytesUsed = (int)(bytes - byteStart); } return((int)(chars - charStart)); } // Slower way's going to need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; byte[] byteBuffer = new byte[1]; char * charEnd = chars + charCount; // Not quite so fast loop while (bytes < byteEnd) { // Faster if don't use *bytes++; byte b = *(bytes); bytes++; if (b >= 0x80) { // This is an invalid byte in the ASCII encoding. if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd); } // Use fallback buffer byteBuffer[0] = b; // Note that chars won't get updated unless this succeeds if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) { bytes--; // unused byte fallbackBuffer.InternalReset(); // Didn't fall this back ThrowCharsOverflow(decoder, chars == charStart); // throw? break; // don't throw, but stop loop } } else { // Make sure we have buffer space if (chars >= charEnd) { bytes--; // unused byte ThrowCharsOverflow(decoder, chars == charStart); // throw? break; // don't throw, but stop loop } *(chars) = unchecked ((char)b); chars++; } } // Might have had decoder fallback stuff. if (decoder != null) decoder.m_bytesUsed = (int)(bytes - byteStart); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder) { byte *numPtr = bytes + byteCount; byte *numPtr2 = bytes; char *chPtr = chars; DecoderReplacementFallback decoderFallback = null; if (decoder == null) { decoderFallback = base.DecoderFallback as DecoderReplacementFallback; } else { decoderFallback = decoder.Fallback as DecoderReplacementFallback; } if ((decoderFallback != null) && (decoderFallback.MaxCharCount == 1)) { char ch = decoderFallback.DefaultString[0]; if (charCount < byteCount) { base.ThrowCharsOverflow(decoder, charCount < 1); numPtr = bytes + charCount; } while (bytes < numPtr) { bytes++; byte num = bytes[0]; if (num >= 0x80) { chars++; chars[0] = ch; } else { chars++; chars[0] = (char)num; } } if (decoder != null) { decoder.m_bytesUsed = (int)((long)((bytes - numPtr2) / 1)); } return((int)((long)((chars - chPtr) / 2))); } DecoderFallbackBuffer fallbackBuffer = null; byte[] buffer2 = new byte[1]; char * charEnd = chars + charCount; while (bytes < numPtr) { byte num2 = bytes[0]; bytes++; if (num2 >= 0x80) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(numPtr - byteCount, charEnd); } buffer2[0] = num2; if (fallbackBuffer.InternalFallback(buffer2, bytes, ref chars)) { continue; } bytes--; fallbackBuffer.InternalReset(); base.ThrowCharsOverflow(decoder, chars == chPtr); break; } if (chars >= charEnd) { bytes--; base.ThrowCharsOverflow(decoder, chars == chPtr); break; } chars[0] = (char)num2; chars++; } if (decoder != null) { decoder.m_bytesUsed = (int)((long)((bytes - numPtr2) / 1)); } return((int)((long)((chars - chPtr) / 2))); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder) { if (charCount < byteCount) { this.ThrowCharsOverflow(decoder, charCount < 1); byteCount = charCount; } for (byte *numPtr = bytes + byteCount; bytes < numPtr; ++bytes) { *chars = (char)*bytes; chars += 2; } if (decoder != null) { decoder.m_bytesUsed = byteCount; } return(byteCount); }
// This is internal and called by something else, internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; if (decoder == null) { fallback = this.DecoderFallback as DecoderReplacementFallback; } else { fallback = decoder.Fallback as DecoderReplacementFallback; } if (fallback != null && fallback.MaxCharCount == 1) { // Just return length, SBCS stay the same length because they don't map to surrogate // pairs and we don't have a decoder fallback. return(count); } // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII DecoderFallbackBuffer fallbackBuffer = null; // Have to do it the hard way. // Assume charCount will be == count int charCount = count; byte[] byteBuffer = new byte[1]; // Do it our fast way byte *byteEnd = bytes + count; // Quick loop while (bytes < byteEnd) { // Faster if don't use *bytes++; byte b = *bytes; bytes++; // If unknown we have to do fallback count if (b >= 0x80) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(byteEnd - count, null); } // Use fallback buffer byteBuffer[0] = b; charCount--; // Have to unreserve the one we already allocated for b charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); } } // Converted sequence is same length as input return(charCount); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { UTF32Decoder decoder = (UTF32Decoder)baseDecoder; char * chPtr = chars; char * chPtr2 = chars + charCount; byte * numPtr = bytes; byte * numPtr2 = bytes + byteCount; int readByteCount = 0; uint iChar = 0; DecoderFallbackBuffer fallbackBuffer = null; if (decoder != null) { readByteCount = decoder.readByteCount; iChar = (uint)decoder.iChar; fallbackBuffer = baseDecoder.FallbackBuffer; } else { fallbackBuffer = base.decoderFallback.CreateFallbackBuffer(); } fallbackBuffer.InternalInitialize(bytes, chars + charCount); while (bytes < numPtr2) { if (this.bigEndian) { iChar = iChar << 8; bytes++; iChar += bytes[0]; } else { iChar = iChar >> 8; bytes++; iChar += (uint)(bytes[0] << 0x18); } readByteCount++; if (readByteCount >= 4) { readByteCount = 0; if ((iChar > 0x10ffff) || ((iChar >= 0xd800) && (iChar <= 0xdfff))) { byte[] buffer2; if (this.bigEndian) { buffer2 = new byte[] { (byte)(iChar >> 0x18), (byte)(iChar >> 0x10), (byte)(iChar >> 8), (byte)iChar }; } else { buffer2 = new byte[] { (byte)iChar, (byte)(iChar >> 8), (byte)(iChar >> 0x10), (byte)(iChar >> 0x18) }; } if (!fallbackBuffer.InternalFallback(buffer2, bytes, ref chars)) { bytes -= 4; iChar = 0; fallbackBuffer.InternalReset(); base.ThrowCharsOverflow(decoder, chars == chPtr); break; } iChar = 0; } else { if (iChar >= 0x10000) { if (chars >= (chPtr2 - 1)) { bytes -= 4; iChar = 0; base.ThrowCharsOverflow(decoder, chars == chPtr); break; } chars++; chars[0] = this.GetHighSurrogate(iChar); iChar = this.GetLowSurrogate(iChar); } else if (chars >= chPtr2) { bytes -= 4; iChar = 0; base.ThrowCharsOverflow(decoder, chars == chPtr); break; } chars++; chars[0] = (char)iChar; iChar = 0; } } } if ((readByteCount > 0) && ((decoder == null) || decoder.MustFlush)) { byte[] buffer3 = new byte[readByteCount]; int num3 = readByteCount; if (!this.bigEndian) { while (num3 > 0) { buffer3[--num3] = (byte)(iChar >> 0x18); iChar = iChar << 8; } } else { while (num3 > 0) { buffer3[--num3] = (byte)iChar; iChar = iChar >> 8; } } if (!fallbackBuffer.InternalFallback(buffer3, bytes, ref chars)) { fallbackBuffer.InternalReset(); base.ThrowCharsOverflow(decoder, chars == chPtr); } else { readByteCount = 0; iChar = 0; } } if (decoder != null) { decoder.iChar = (int)iChar; decoder.readByteCount = readByteCount; decoder.m_bytesUsed = (int)((long)((bytes - numPtr) / 1)); } return((int)((long)((chars - chPtr) / 2))); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder) { UTF32Decoder decoder = (UTF32Decoder)baseDecoder; int num = 0; byte * numPtr = bytes + count; byte * byteStart = bytes; int readByteCount = 0; uint iChar = 0; DecoderFallbackBuffer fallbackBuffer = null; if (decoder != null) { readByteCount = decoder.readByteCount; iChar = (uint)decoder.iChar; fallbackBuffer = decoder.FallbackBuffer; } else { fallbackBuffer = base.decoderFallback.CreateFallbackBuffer(); } fallbackBuffer.InternalInitialize(byteStart, null); while ((bytes < numPtr) && (num >= 0)) { if (this.bigEndian) { iChar = iChar << 8; bytes++; iChar += bytes[0]; } else { iChar = iChar >> 8; bytes++; iChar += (uint)(bytes[0] << 0x18); } readByteCount++; if (readByteCount >= 4) { readByteCount = 0; if ((iChar > 0x10ffff) || ((iChar >= 0xd800) && (iChar <= 0xdfff))) { byte[] buffer2; if (this.bigEndian) { buffer2 = new byte[] { (byte)(iChar >> 0x18), (byte)(iChar >> 0x10), (byte)(iChar >> 8), (byte)iChar }; } else { buffer2 = new byte[] { (byte)iChar, (byte)(iChar >> 8), (byte)(iChar >> 0x10), (byte)(iChar >> 0x18) }; } num += fallbackBuffer.InternalFallback(buffer2, bytes); iChar = 0; } else { if (iChar >= 0x10000) { num++; } num++; iChar = 0; } } } if ((readByteCount > 0) && ((decoder == null) || decoder.MustFlush)) { byte[] buffer3 = new byte[readByteCount]; if (!this.bigEndian) { while (readByteCount > 0) { buffer3[--readByteCount] = (byte)(iChar >> 0x18); iChar = iChar << 8; } } else { while (readByteCount > 0) { buffer3[--readByteCount] = (byte)iChar; iChar = iChar >> 8; } } num += fallbackBuffer.InternalFallback(buffer3, bytes); } if (num < 0) { throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); } return(num); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { Decoder decoder = (Decoder)baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; } if (bitCount >= 0x10) { if (!buffer.AddChar((char)((bits >> (bitCount - 0x10)) & 0xffff))) { base.ThrowCharsOverflow(decoder, true); } bitCount -= 0x10; } while (buffer.MoreData) { int num4; byte nextByte = buffer.GetNextByte(); if (bitCount >= 0) { sbyte num5; if ((nextByte < 0x80) && ((num5 = this.base64Values[nextByte]) >= 0)) { firstByte = false; bits = (bits << 6) | ((byte)num5); bitCount += 6; if (bitCount < 0x10) { continue; } num4 = (bits >> (bitCount - 0x10)) & 0xffff; bitCount -= 0x10; goto Label_00FB; } bitCount = -1; if (nextByte == 0x2d) { if (!firstByte) { continue; } num4 = 0x2b; goto Label_00FB; } if (buffer.Fallback(nextByte)) { continue; } break; } if (nextByte == 0x2b) { bitCount = 0; firstByte = true; continue; } if (nextByte >= 0x80) { if (buffer.Fallback(nextByte)) { continue; } break; } num4 = nextByte; Label_00FB: if ((num4 >= 0) && !buffer.AddChar((char)num4)) { if (bitCount >= 0) { buffer.AdjustBytes(1); bitCount += 0x10; } break; } } if ((chars != null) && (decoder != null)) { if (decoder.MustFlush) { decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder.m_bytesUsed = buffer.BytesUsed; } return(buffer.Count); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { base.CheckMemorySection(); DBCSDecoder decoder = (DBCSDecoder)baseDecoder; byte * numPtr = bytes; byte * numPtr2 = bytes + byteCount; char * chPtr = chars; char * charEnd = chars + charCount; bool flag = false; DecoderFallbackBuffer fallbackBuffer = null; if ((decoder != null) && (decoder.bLeftOver > 0)) { if (byteCount == 0) { if (!decoder.MustFlush) { return(0); } fallbackBuffer = decoder.FallbackBuffer; fallbackBuffer.InternalInitialize(bytes, charEnd); byte[] buffer2 = new byte[] { decoder.bLeftOver }; if (!fallbackBuffer.InternalFallback(buffer2, bytes, ref chars)) { base.ThrowCharsOverflow(decoder, true); } decoder.bLeftOver = 0; return((int)((long)((chars - chPtr) / 2))); } int index = decoder.bLeftOver << 8; index |= bytes[0]; bytes++; char ch = this.mapBytesToUnicode[index]; if ((ch == '\0') && (index != 0)) { fallbackBuffer = decoder.FallbackBuffer; fallbackBuffer.InternalInitialize(numPtr2 - byteCount, charEnd); byte[] buffer3 = new byte[] { (byte)(index >> 8), (byte)index }; if (!fallbackBuffer.InternalFallback(buffer3, bytes, ref chars)) { base.ThrowCharsOverflow(decoder, true); } } else { if (chars >= charEnd) { base.ThrowCharsOverflow(decoder, true); } chars++; chars[0] = ch; } } while (bytes < numPtr2) { int num2 = bytes[0]; bytes++; char ch2 = this.mapBytesToUnicode[num2]; if (ch2 == 0xfffe) { if (bytes < numPtr2) { num2 = num2 << 8; num2 |= bytes[0]; bytes++; ch2 = this.mapBytesToUnicode[num2]; } else if ((decoder == null) || decoder.MustFlush) { ch2 = '\0'; } else { flag = true; decoder.bLeftOver = (byte)num2; break; } } if ((ch2 == '\0') && (num2 != 0)) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(numPtr2 - byteCount, charEnd); } byte[] buffer4 = null; if (num2 < 0x100) { buffer4 = new byte[] { (byte)num2 }; } else { buffer4 = new byte[] { (byte)(num2 >> 8), (byte)num2 }; } if (fallbackBuffer.InternalFallback(buffer4, bytes, ref chars)) { continue; } bytes -= buffer4.Length; fallbackBuffer.InternalReset(); base.ThrowCharsOverflow(decoder, bytes == numPtr); break; } if (chars >= charEnd) { bytes--; if (num2 >= 0x100) { bytes--; } base.ThrowCharsOverflow(decoder, bytes == numPtr); break; } chars++; chars[0] = ch2; } if (decoder != null) { if (!flag) { decoder.bLeftOver = 0; } decoder.m_bytesUsed = (int)((long)((bytes - numPtr) / 1)); } return((int)((long)((chars - chPtr) / 2))); }
internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { int num4; byte* pSrc = bytes; byte* a = pSrc + count; int num = count; int ch = 0; DecoderFallbackBuffer fallback = null; if (baseDecoder != null) { UTF8Decoder decoder = (UTF8Decoder) baseDecoder; ch = decoder.bits; num -= ch >> 30; } Label_0027: if (pSrc >= a) { goto Label_0336; } if (ch == 0) { ch = pSrc[0]; pSrc++; goto Label_010D; } int num3 = pSrc[0]; pSrc++; if ((num3 & -64) != 0x80) { pSrc--; num += ch >> 30; } else { ch = (ch << 6) | (num3 & 0x3f); if ((ch & 0x20000000) == 0) { if ((ch & 0x10000000) != 0) { if (((ch & 0x800000) != 0) || InRange(ch & 0x1f0, 0x10, 0x100)) { goto Label_0027; } } else if (((ch & 0x3e0) != 0) && ((ch & 0x3e0) != 0x360)) { goto Label_0027; } } else { if ((ch & 0x101f0000) == 0x10000000) { num--; } goto Label_0183; } } Label_00C9: if (fallback == null) { if (baseDecoder == null) { fallback = base.decoderFallback.CreateFallbackBuffer(); } else { fallback = baseDecoder.FallbackBuffer; } fallback.InternalInitialize(bytes, null); } num += this.FallbackInvalidByteSequence(pSrc, ch, fallback); ch = 0; goto Label_0027; Label_010D: if (ch > 0x7f) { num--; if ((ch & 0x40) == 0) { goto Label_00C9; } if ((ch & 0x20) != 0) { if ((ch & 0x10) != 0) { ch &= 15; if (ch > 4) { ch |= 240; goto Label_00C9; } ch |= 0x504d0c00; num--; } else { ch = (ch & 15) | 0x48228000; num--; } } else { ch &= 0x1f; if (ch <= 1) { ch |= 0xc0; goto Label_00C9; } ch |= 0x800000; } goto Label_0027; } Label_0183: num4 = PtrDiff(a, pSrc); if (num4 <= 13) { byte* numPtr3 = a; while (pSrc < numPtr3) { ch = pSrc[0]; pSrc++; if (ch > 0x7f) { goto Label_010D; } } ch = 0; goto Label_0336; } byte* numPtr4 = (pSrc + num4) - 7; while (pSrc < numPtr4) { int num6; ch = pSrc[0]; pSrc++; if (ch > 0x7f) { goto Label_025A; } if ((((int) pSrc) & 1) != 0) { ch = pSrc[0]; pSrc++; if (ch > 0x7f) { goto Label_025A; } } if ((((int) pSrc) & 2) != 0) { ch = *((ushort*) pSrc); if ((ch & 0x8080) != 0) { goto Label_0245; } pSrc += 2; } while (pSrc < numPtr4) { ch = *((int*) pSrc); int num5 = *((int*) (pSrc + 4)); if (((ch | num5) & -2139062144) != 0) { goto Label_0245; } pSrc += 8; if (pSrc >= numPtr4) { break; } ch = *((int*) pSrc); num5 = *((int*) (pSrc + 4)); if (((ch | num5) & -2139062144) != 0) { goto Label_0245; } pSrc += 8; } break; Label_0245: ch &= 0xff; pSrc++; if (ch <= 0x7f) { continue; } Label_025A: num6 = pSrc[0]; pSrc++; if (((ch & 0x40) == 0) || ((num6 & -64) != 0x80)) { goto Label_032A; } num6 &= 0x3f; if ((ch & 0x20) != 0) { num6 |= (ch & 15) << 6; if ((ch & 0x10) != 0) { ch = pSrc[0]; if (!InRange(num6 >> 4, 1, 0x10) || ((ch & -64) != 0x80)) { goto Label_032A; } num6 = (num6 << 6) | (ch & 0x3f); ch = pSrc[1]; if ((ch & -64) != 0x80) { goto Label_032A; } pSrc += 2; num--; } else { ch = pSrc[0]; if ((((num6 & 0x3e0) == 0) || ((num6 & 0x3e0) == 0x360)) || ((ch & -64) != 0x80)) { goto Label_032A; } pSrc++; num--; } } else if ((ch & 30) == 0) { goto Label_032A; } num--; } ch = 0; goto Label_0027; Label_032A: pSrc -= 2; ch = 0; goto Label_0027; Label_0336: if (ch == 0) { return num; } num += ch >> 30; if ((baseDecoder != null) && !baseDecoder.MustFlush) { return num; } if (fallback == null) { if (baseDecoder == null) { fallback = base.decoderFallback.CreateFallbackBuffer(); } else { fallback = baseDecoder.FallbackBuffer; } fallback.InternalInitialize(bytes, null); } return (num + this.FallbackInvalidByteSequence(pSrc, ch, fallback)); }
public unsafe abstract int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder);
internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { return this.GetChars(bytes, count, null, 0, baseDecoder); }
internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder; byte* byteEnd = bytes + count; byte* byteStart = bytes; // Need last vars int lastByte = -1; char lastChar = (char)0; // Start by assuming same # of chars as bytes int charCount = count >> 1; // Need -1 to check 2 at a time. If we have an even #, longBytes will go // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longBytes // will go from longEnd - 1 long to longEnd. (Might not get to use this) ulong* longEnd = (ulong*)(byteEnd - 7); // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; if (decoder != null) { lastByte = decoder.lastByte; lastChar = decoder.lastChar; // Assume extra char if last char was around if (lastChar > 0) charCount++; // Assume extra char if extra last byte makes up odd # of input bytes if (lastByte >= 0 && (count & 1) == 1) { charCount++; } } while (bytes < byteEnd) { // If we're aligned then maybe we can do it fast // This'll hurt if we're unaligned because we'll always test but never be aligned #if !NO_FAST_UNICODE_LOOP #if BIGENDIAN if (bigEndian && #else // BIGENDIAN if (!bigEndian && #endif // BIGENDIAN #if WIN64 // win64 has to be long aligned (unchecked((long)bytes) & 7) == 0 && #else (unchecked((int)bytes) & 3) == 0 && #endif // WIN64 lastByte == -1 && lastChar == 0) { // Need new char* so we can check 4 at a time ulong* longBytes = (ulong*)bytes; while (longBytes < longEnd) { // See if we potentially have surrogates (0x8000 bit set) // (We're either big endian on a big endian machine or little endian on // a little endian machine so this'll work) if ((0x8000800080008000 & *longBytes) != 0) { // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high // 5 bits looks like 11011, then its a high or low surrogate. // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set. // Note that we expect BMP characters to be more common than surrogates // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800; // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate // but no clue if they're high or low. // If each of the 4 characters are non-zero, then none are surrogates. if ((uTemp & 0xFFFF000000000000) == 0 || (uTemp & 0x0000FFFF00000000) == 0 || (uTemp & 0x00000000FFFF0000) == 0 || (uTemp & 0x000000000000FFFF) == 0) { // It has at least 1 surrogate, but we don't know if they're high or low surrogates, // or if there's 1 or 4 surrogates // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern #if BIGENDIAN if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0) #else if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0) #endif { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. // Drop out to the slow loop to resolve the surrogates break; } // else they are all surrogates in High/Low/High/Low order, so we can use them. } // else none are surrogates, so we can use them. } // else all < 0x8000 so we can use them // We can use these 4 chars. longBytes++; } bytes = (byte*)longBytes; if (bytes >= byteEnd) break; } #endif // !NO_FAST_UNICODE_LOOP // Get 1st byte if (lastByte < 0) { lastByte = *bytes++; if (bytes >= byteEnd) break; } // Get full char char ch; if (bigEndian) { ch = (char)(lastByte << 8 | *(bytes++)); } else { ch = (char)(*(bytes++) << 8 | lastByte); } lastByte = -1; // See if the char's valid if (ch >= 0xd800 && ch <= 0xdfff) { // Was it a high surrogate? if (ch <= 0xdbff) { // Its a high surrogate, if we had one then do fallback for previous one if (lastChar > 0) { // Ignore previous bad high surrogate charCount--; // Get fallback for previous high surrogate // Note we have to reconstruct bytes because some may have been in decoder byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) }; } else { byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); } // Get fallback. charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); } // Ignore the last one which fell back already, // and remember the new high surrogate lastChar = ch; continue; } // Its a low surrogate if (lastChar == 0) { // Expected a previous high surrogate charCount--; // Get fallback for this low surrogate // Note we have to reconstruct bytes because some may have been in decoder byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(ch >> 8)), unchecked((byte)ch) }; } else { byteBuffer = new byte[] { unchecked((byte)ch), unchecked((byte)(ch >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); } charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); // Ignore this one (we already did its fallback) continue; } // Valid surrogate pair, already counted. lastChar = (char)0; } else if (lastChar > 0) { // Had a high surrogate, expected a low surrogate // Uncount the last high surrogate charCount--; // fall back the high surrogate. byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) }; } else { byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); } // Already subtracted high surrogate charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); // Not left over now, clear previous high surrogate and continue to add current char lastChar = (char)0; } // Valid char, already counted } // Extra space if we can't use decoder if (decoder == null || decoder.MustFlush) { if (lastChar > 0) { // No hanging high surrogates allowed, do fallback and remove count for it charCount--; byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) }; } else { byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); } charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); lastChar = (char)0; } if (lastByte >= 0) { if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); } // No hanging odd bytes allowed if must flush charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes); lastByte = -1; } } // If we had a high surrogate left over, we can't count it if (lastChar > 0) charCount--; return charCount; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); int defaultCodePage = this.defaultCodePage; bool bLastATR = false; bool bLastVirama = false; bool bLastDevenagariStressAbbr = false; char cLastCharForNextNukta = '\0'; char cLastCharForNoNextNukta = '\0'; if (decoder != null) { defaultCodePage = decoder.currentCodePage; bLastATR = decoder.bLastATR; bLastVirama = decoder.bLastVirama; bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr; cLastCharForNextNukta = decoder.cLastCharForNextNukta; cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta; } bool flag4 = ((bLastVirama | bLastATR) | bLastDevenagariStressAbbr) | (cLastCharForNextNukta != '\0'); int num2 = -1; if ((defaultCodePage >= 2) && (defaultCodePage <= 11)) { num2 = IndicMappingIndex[defaultCodePage]; } while (buffer.MoreData) { byte nextByte = buffer.GetNextByte(); if (flag4) { flag4 = false; if (bLastATR) { if ((nextByte >= 0x42) && (nextByte <= 0x4b)) { defaultCodePage = nextByte & 15; num2 = IndicMappingIndex[defaultCodePage]; bLastATR = false; continue; } if (nextByte == 0x40) { defaultCodePage = this.defaultCodePage; num2 = -1; if ((defaultCodePage >= 2) && (defaultCodePage <= 11)) { num2 = IndicMappingIndex[defaultCodePage]; } bLastATR = false; continue; } if (nextByte == 0x41) { defaultCodePage = this.defaultCodePage; num2 = -1; if ((defaultCodePage >= 2) && (defaultCodePage <= 11)) { num2 = IndicMappingIndex[defaultCodePage]; } bLastATR = false; continue; } if (!buffer.Fallback((byte) 0xef)) { break; } bLastATR = false; } else if (bLastVirama) { if (nextByte == 0xe8) { if (!buffer.AddChar('')) { break; } bLastVirama = false; continue; } if (nextByte == 0xe9) { if (!buffer.AddChar('')) { break; } bLastVirama = false; continue; } bLastVirama = false; } else if (bLastDevenagariStressAbbr) { if (nextByte == 0xb8) { if (!buffer.AddChar('॒')) { break; } bLastDevenagariStressAbbr = false; continue; } if (nextByte == 0xbf) { if (!buffer.AddChar('॰')) { break; } bLastDevenagariStressAbbr = false; continue; } if (!buffer.Fallback((byte) 240)) { break; } bLastDevenagariStressAbbr = false; } else { if (nextByte == 0xe9) { if (!buffer.AddChar(cLastCharForNextNukta)) { break; } cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; continue; } if (!buffer.AddChar(cLastCharForNoNextNukta)) { break; } cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; } } if (nextByte < 160) { if (buffer.AddChar((char) nextByte)) { continue; } break; } if (nextByte == 0xef) { bLastATR = flag4 = true; } else { char ch = IndicMapping[num2, 0, nextByte - 160]; char ch4 = IndicMapping[num2, 1, nextByte - 160]; if ((ch4 == '\0') || (nextByte == 0xe9)) { if (ch == '\0') { if (buffer.Fallback(nextByte)) { continue; } } else if (buffer.AddChar(ch)) { continue; } break; } if (nextByte == 0xe8) { if (!buffer.AddChar(ch)) { break; } bLastVirama = flag4 = true; } else { if ((ch4 & 0xf000) == 0) { flag4 = true; cLastCharForNextNukta = ch4; cLastCharForNoNextNukta = ch; continue; } bLastDevenagariStressAbbr = flag4 = true; } } } if ((decoder == null) || decoder.MustFlush) { if (bLastATR) { if (buffer.Fallback((byte) 0xef)) { bLastATR = false; } else { buffer.GetNextByte(); } } else if (bLastDevenagariStressAbbr) { if (buffer.Fallback((byte) 240)) { bLastDevenagariStressAbbr = false; } else { buffer.GetNextByte(); } } else if (cLastCharForNoNextNukta != '\0') { if (buffer.AddChar(cLastCharForNoNextNukta)) { cLastCharForNoNextNukta = cLastCharForNextNukta = '\0'; } else { buffer.GetNextByte(); } } } if ((decoder != null) && (chars != null)) { if ((!decoder.MustFlush || (cLastCharForNoNextNukta != '\0')) || (bLastATR || bLastDevenagariStressAbbr)) { decoder.currentCodePage = defaultCodePage; decoder.bLastVirama = bLastVirama; decoder.bLastATR = bLastATR; decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr; decoder.cLastCharForNextNukta = cLastCharForNextNukta; decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta; } else { decoder.currentCodePage = this.defaultCodePage; decoder.bLastVirama = false; decoder.bLastATR = false; decoder.bLastDevenagariStressAbbr = false; decoder.cLastCharForNextNukta = '\0'; decoder.cLastCharForNoNextNukta = '\0'; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
// Workhorse internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { // Just call GetChars with null chars saying we want count return GetChars(bytes, count, null, 0, baseDecoder); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { // Just assert, we're called internally so these should be safe, checked already Contract.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null"); Contract.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative"); // Just call getChars with null char* to get count return GetChars(bytes, count, null, 0, baseDecoder); }
[System.Security.SecurityCritical] // auto-generated internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount) { this.enc = enc; this.decoder = decoder; this.chars = charStart; this.charStart = charStart; this.charEnd = charStart + charCount; this.byteStart = byteStart; this.bytes = byteStart; this.byteEnd = byteStart + byteCount; if (this.decoder == null) this.fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer(); else this.fallbackBuffer = this.decoder.FallbackBuffer; // If we're getting chars or getting char count we don't expect to have // to remember fallbacks between calls (so it should be empty) Contract.Assert(fallbackBuffer.Remaining == 0, "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount"); fallbackBuffer.InternalInitialize(bytes, charEnd); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Contract.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null"); Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative"); Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative"); // Fix our decoder ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder; int iCount = 0; switch (CodePage) { case 50220: case 50221: case 50222: iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder); break; case 50225: iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder); break; // Currently 50227 is the same as 936 // case 50227: // iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder); // break; case 52936: iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder); break; default: Contract.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page"); break; } return iCount; }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Contract.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null"); Contract.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative"); Contract.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null"); Contract.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative"); // Do it fast way if using ? replacement fallback byte* byteEnd = bytes + byteCount; byte* byteStart = bytes; char* charStart = chars; // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f // Only need decoder fallback buffer if not using ? fallback. // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; if (decoder == null) fallback = this.DecoderFallback as DecoderReplacementFallback; else { fallback = decoder.Fallback as DecoderReplacementFallback; Contract.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer"); } if (fallback != null && fallback.MaxCharCount == 1) { // Try it the fast way char replacementChar = fallback.DefaultString[0]; // Need byteCount chars, otherwise too small buffer if (charCount < byteCount) { // Need at least 1 output byte, throw if must throw ThrowCharsOverflow(decoder, charCount < 1); // Not throwing, use what we can byteEnd = bytes + charCount; } // Quick loop, just do '?' replacement because we don't have fallbacks for decodings. while (bytes < byteEnd) { byte b = *(bytes++); if (b >= 0x80) // This is an invalid byte in the ASCII encoding. *(chars++) = replacementChar; else *(chars++) = unchecked((char)b); } // bytes & chars used are the same if (decoder != null) decoder.m_bytesUsed = (int)(bytes - byteStart); return (int)(chars - charStart); } // Slower way's going to need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; byte[] byteBuffer = new byte[1]; char* charEnd = chars + charCount; // Not quite so fast loop while (bytes < byteEnd) { // Faster if don't use *bytes++; byte b = *(bytes); bytes++; if (b >= 0x80) { // This is an invalid byte in the ASCII encoding. if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd); } // Use fallback buffer byteBuffer[0] = b; // Note that chars won't get updated unless this succeeds if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) { // May or may not throw, but we didn't get this byte Contract.Assert(bytes > byteStart || chars == charStart, "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)"); bytes--; // unused byte fallbackBuffer.InternalReset(); // Didn't fall this back ThrowCharsOverflow(decoder, chars == charStart); // throw? break; // don't throw, but stop loop } } else { // Make sure we have buffer space if (chars >= charEnd) { Contract.Assert(bytes > byteStart || chars == charStart, "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)"); bytes--; // unused byte ThrowCharsOverflow(decoder, chars == charStart); // throw? break; // don't throw, but stop loop } *(chars) = unchecked((char)b); chars++; } } // Might have had decoder fallback stuff. if (decoder != null) decoder.m_bytesUsed = (int)(bytes - byteStart); // Expect Empty fallback buffer for GetChars Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[ASCIIEncoding.GetChars]Expected Empty fallback buffer"); return (int)(chars - charStart); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { Contract.Assert(count >=0, "[UTF7Encoding.GetCharCount]count >=0"); Contract.Assert(bytes!=null, "[UTF7Encoding.GetCharCount]bytes!=null"); // Just call GetChars with null char* to do counting return GetChars(bytes, count, null, 0, baseDecoder); }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[Latin1Encoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[Latin1Encoding.GetChars]byteCount is negative"); Debug.Assert(chars != null, "[Latin1Encoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[Latin1Encoding.GetChars]charCount is negative"); // Need byteCount chars, otherwise too small buffer if (charCount < byteCount) { // Buffer too small. Do we throw? ThrowCharsOverflow(decoder, charCount < 1); // Don't throw, correct buffer size byteCount = charCount; } // Do it our fast way byte* byteEnd = bytes + byteCount; // Quick loop, all bytes are the same as chars, so no fallbacks for latin1 while (bytes < byteEnd) { *(chars) = unchecked((char)*(bytes)); chars++; bytes++; } // Might need to know input bytes used if (decoder != null) decoder.m_bytesUsed = byteCount; // Converted sequence is same length as input, so output charsUsed is same as byteCount; return byteCount; }
internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder) { return this.GetCharCount(bytes, count); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder) { return(this.GetChars(bytes, count, (char *)null, 0, baseDecoder)); }
internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded) { if (((decoder == null) || decoder.m_throwOnOverflow) || nothingDecoded) { if ((decoder != null) && decoder.InternalHasFallbackBuffer) { decoder.FallbackBuffer.InternalReset(); } this.ThrowCharsOverflow(); } decoder.ClearMustFlush(); }
internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount) { this.enc = enc; this.decoder = decoder; this.chars = charStart; this.charStart = charStart; this.charEnd = charStart + charCount; this.byteStart = byteStart; this.bytes = byteStart; this.byteEnd = byteStart + byteCount; if (this.decoder == null) { this.fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer(); } else { this.fallbackBuffer = this.decoder.FallbackBuffer; } this.fallbackBuffer.InternalInitialize(this.bytes, this.charEnd); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder) { base.CheckMemorySection(); DBCSDecoder decoder = (DBCSDecoder)baseDecoder; DecoderFallbackBuffer fallbackBuffer = null; byte *numPtr = bytes + count; int num = count; if ((decoder != null) && (decoder.bLeftOver > 0)) { if (count == 0) { if (!decoder.MustFlush) { return(0); } fallbackBuffer = decoder.FallbackBuffer; fallbackBuffer.InternalInitialize(bytes, null); byte[] buffer2 = new byte[] { decoder.bLeftOver }; return(fallbackBuffer.InternalFallback(buffer2, bytes)); } int index = decoder.bLeftOver << 8; index |= bytes[0]; bytes++; if ((this.mapBytesToUnicode[index] == '\0') && (index != 0)) { num--; fallbackBuffer = decoder.FallbackBuffer; fallbackBuffer.InternalInitialize(numPtr - count, null); byte[] buffer3 = new byte[] { (byte)(index >> 8), (byte)index }; num += fallbackBuffer.InternalFallback(buffer3, bytes); } } while (bytes < numPtr) { int num3 = bytes[0]; bytes++; char ch2 = this.mapBytesToUnicode[num3]; if (ch2 == 0xfffe) { num--; if (bytes < numPtr) { num3 = num3 << 8; num3 |= bytes[0]; bytes++; ch2 = this.mapBytesToUnicode[num3]; } else { if ((decoder != null) && !decoder.MustFlush) { return(num); } num++; ch2 = '\0'; } } if ((ch2 == '\0') && (num3 != 0)) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(numPtr - count, null); } num--; byte[] buffer4 = null; if (num3 < 0x100) { buffer4 = new byte[] { (byte)num3 }; } else { buffer4 = new byte[] { (byte)(num3 >> 8), (byte)num3 }; } num += fallbackBuffer.InternalFallback(buffer4, bytes); } } return(num); }
internal virtual unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder) { return this.GetChars(bytes, byteCount, chars, charCount); }
// This is internal and called by something else, public override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { // Just assert, we're called internally so these should be safe, checked already Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetCharCount]bytes is null"); Debug.Assert(count >= 0, "[SBCSCodePageEncoding.GetCharCount]byteCount is negative"); CheckMemorySection(); // See if we have best fit bool bUseBestFit = false; // Only need decoder fallback buffer if not using default replacement fallback or best fit fallback. DecoderReplacementFallback fallback = null; if (decoder == null) { fallback = DecoderFallback as DecoderReplacementFallback; bUseBestFit = DecoderFallback is InternalDecoderBestFitFallback; } else { fallback = decoder.Fallback as DecoderReplacementFallback; bUseBestFit = decoder.Fallback is InternalDecoderBestFitFallback; Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[SBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start"); } if (bUseBestFit || (fallback != null && fallback.MaxCharCount == 1)) { // Just return length, SBCS stay the same length because they don't map to surrogate // pairs and we don't have a decoder fallback. return(count); } // Might need one of these later DecoderFallbackBuffer fallbackBuffer = null; DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); // Have to do it the hard way. // Assume charCount will be == count int charCount = count; byte[] byteBuffer = new byte[1]; // Do it our fast way byte *byteEnd = bytes + count; // Quick loop while (bytes < byteEnd) { // Faster if don't use *bytes++; char c; c = _mapBytesToUnicode[*bytes]; bytes++; // If unknown we have to do fallback count if (c == UNKNOWN_CHAR) { // Must have a fallback buffer if (fallbackBuffer == null) { // Need to adjust count so we get real start if (decoder == null) { fallbackBuffer = DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(byteEnd - count, null); } // Use fallback buffer byteBuffer[0] = *(bytes - 1); charCount--; // We'd already reserved one for *(bytes-1) charCount += fallbackHelper.InternalFallback(byteBuffer, bytes); } } // Fallback buffer must be empty Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[SBCSEncoding.GetCharCount]Expected Empty fallback buffer at end"); // Converted sequence is same length as input return(charCount); }
public unsafe abstract int GetCharCount(byte *bytes, int count, DecoderNLS decoder);
// This is internal and called by something else, internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder) { // Just assert, we're called internally so these should be safe, checked already Debug.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null"); Debug.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative"); // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; if (decoder == null) { fallback = this.DecoderFallback as DecoderReplacementFallback; } else { fallback = decoder.Fallback as DecoderReplacementFallback; Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer"); } if (fallback != null && fallback.MaxCharCount == 1) { // Just return length, SBCS stay the same length because they don't map to surrogate // pairs and we don't have a decoder fallback. return(count); } // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII DecoderFallbackBuffer fallbackBuffer = null; // Have to do it the hard way. // Assume charCount will be == count int charCount = count; byte[] byteBuffer = new byte[1]; // Do it our fast way byte *byteEnd = bytes + count; // Quick loop while (bytes < byteEnd) { // Faster if don't use *bytes++; byte b = *bytes; bytes++; // If unknown we have to do fallback count if (b >= 0x80) { if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(byteEnd - count, null); } // Use fallback buffer byteBuffer[0] = b; charCount--; // Have to unreserve the one we already allocated for b charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes); } } // Fallback buffer must be empty Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer"); // Converted sequence is same length as input return(charCount); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { Contract.Assert(byteCount >=0, "[UTF7Encoding.GetChars]byteCount >=0"); Contract.Assert(bytes!=null, "[UTF7Encoding.GetChars]bytes!=null"); Contract.Assert(charCount >=0, "[UTF7Encoding.GetChars]charCount >=0"); // Might use a decoder UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder) baseDecoder; // Get our output buffer info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Get decoder info int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; Contract.Assert(firstByte == false || decoder.bitCount <= 0, "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set"); } // We may have had bits in the decoder that we couldn't output last time, so do so now if (bitCount >= 16) { // Check our decoder buffer if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF))) ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert // Used this one, clean up extra bits bitCount -= 16; } // Loop through the input while (buffer.MoreData) { byte currentByte = buffer.GetNextByte(); int c; if (bitCount >= 0) { // // Modified base 64 encoding. // sbyte v; if (currentByte < 0x80 && ((v = base64Values[currentByte]) >=0)) { firstByte = false; bits = (bits << 6) | ((byte)v); bitCount += 6; if (bitCount >= 16) { c = (bits >> (bitCount - 16)) & 0xFFFF; bitCount -= 16; } // If not enough bits just continue else continue; } else { // If it wasn't a base 64 byte, everything's going to turn off base 64 mode bitCount = -1; if (currentByte != '-') { // >= 0x80 (because of 1st if statemtn) // We need this check since the base64Values[b] check below need b <= 0x7f. // This is not a valid base 64 byte. Terminate the shifted-sequence and // emit this byte. // not in base 64 table // According to the RFC 1642 and the example code of UTF-7 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte // Chars won't be updated unless this works, try to fallback if (!buffer.Fallback(currentByte)) break; // Stop here, didn't throw // Used that byte, we're done with it continue; } // // The encoding for '+' is "+-". // if (firstByte) c = '+'; // We just turn it off if not emitting a +, so we're done. else continue; } // // End of modified base 64 encoding block. // } else if (currentByte == '+') { // // Found the start of a modified base 64 encoding block or a plus sign. // bitCount = 0; firstByte = true; continue; } else { // Normal character if (currentByte >= 0x80) { // Try to fallback if (!buffer.Fallback(currentByte)) break; // Stop here, didn't throw // Done falling back continue; } // Use the normal character c = currentByte; } if (c >= 0) { // Check our buffer if (!buffer.AddChar((char)c)) { // No room. If it was a plain char we'll try again later. // Note, we'll consume this byte and stick it in decoder, even if we can't output it if (bitCount >= 0) // Can we rememmber this byte (char) { buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed bitCount += 16; // We'll still need that char we have in our bits } break; // didn't throw, stop } } } // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder) if (chars != null && decoder != null) { // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer) if (decoder.MustFlush) { // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder.m_bytesUsed = buffer.BytesUsed; } // else ignore any hanging bits. // Return our count return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { Contract.Assert(count >=0, "[UTF8Encoding.GetCharCount]count >=0"); Contract.Assert(bytes!=null, "[UTF8Encoding.GetCharCount]bytes!=null"); // Initialize stuff byte *pSrc = bytes; byte *pEnd = pSrc+count; // Start by assuming we have as many as count, charCount always includes the adjustment // for the character being decoded int charCount = count; int ch = 0; DecoderFallbackBuffer fallback = null; if (baseDecoder != null) { UTF8Decoder decoder = (UTF8Decoder)baseDecoder; ch = decoder.bits; charCount -= (ch >> 30); // Adjust char count for # of expected bytes and expected output chars. // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for count) Contract.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at start"); } for (;;) { // SLOWLOOP: does all range checks, handles all special cases, but it is slow if (pSrc >= pEnd) { break; } if (ch == 0) { // no pending bits goto ReadChar; } // read next byte. The JIT optimization seems to be getting confused when // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead int cha = *pSrc; pSrc++; // we are expecting to see trailing bytes like 10vvvvvv if ((cha & unchecked((sbyte)0xC0)) != 0x80) { // This can be a valid starting byte for another UTF8 byte sequence, so let's put // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence pSrc--; charCount += (ch >> 30); goto InvalidByteSequence; } // fold in the new byte ch = (ch << 6) | (cha & 0x3F); if ((ch & FinalByte) == 0) { Contract.Assert( (ch & (SupplimentarySeq | ThreeByteSeq)) != 0, "[UTF8Encoding.GetChars]Invariant volation"); if ((ch & SupplimentarySeq) != 0) { if ((ch & (FinalByte >> 6)) != 0) { // this is 3rd byte (of 4 byte supplimentary) - nothing to do continue; } // 2nd byte, check for non-shortest form of supplimentary char and the valid // supplimentary characters in range 0x010000 - 0x10FFFF at the same time if (!InRange(ch & 0x1F0, 0x10, 0x100)) { goto InvalidByteSequence; } } else { // Must be 2nd byte of a 3-byte sequence // check for non-shortest form of 3 byte seq if ((ch & (0x1F << 5)) == 0 || // non-shortest form (ch & (0xF800 >> 6) ) == (0xD800 >> 6)) // illegal individually encoded surrogate { goto InvalidByteSequence; } } continue; } // ready to punch // adjust for surrogates in non-shortest form if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) { charCount--; } goto EncodeChar; InvalidByteSequence: // this code fragment should be close to the gotos referencing it // Have to do fallback for invalid bytes if (fallback == null) { if (baseDecoder == null) fallback = this.decoderFallback.CreateFallbackBuffer(); else fallback = baseDecoder.FallbackBuffer; fallback.InternalInitialize(bytes, null); } charCount += FallbackInvalidByteSequence(pSrc, ch, fallback); ch = 0; continue; ReadChar: ch = *pSrc; pSrc++; ProcessChar: if (ch > 0x7F) { // If its > 0x7F, its start of a new multi-byte sequence // Long sequence, so unreserve our char. charCount--; // bit 6 has to be non-zero for start of multibyte chars. if ((ch & 0x40) == 0) { // Unexpected trail byte goto InvalidByteSequence; } // start a new long code if ((ch & 0x20) != 0) { if ((ch & 0x10) != 0) { // 4 byte encoding - supplimentary character (2 surrogates) ch &= 0x0F; // check that bit 4 is zero and the valid supplimentary character // range 0x000000 - 0x10FFFF at the same time if (ch > 0x04) { ch |= 0xf0; goto InvalidByteSequence; } // Add bit flags so that when we check new characters & rotate we'll be flagged correctly. // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag. ch |= (FinalByte >> 3*6) | // Final byte is 3 more bytes from now (1 << 30) | // If it dies on next byte we'll need an extra char (3 << (30-2*6)) | // If it dies on last byte we'll need to subtract a char (SupplimentarySeq) | (SupplimentarySeq >> 6) | (SupplimentarySeq >> 2*6) | (SupplimentarySeq >> 3*6); // Our character count will be 2 characters for these 4 bytes, so subtract another char charCount--; } else { // 3 byte encoding // Add bit flags so that when we check new characters & rotate we'll be flagged correctly. ch = (ch & 0x0F) | ( (FinalByte >> 2*6) | (1 << 30) | (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2*6) ); // We'll expect 1 character for these 3 bytes, so subtract another char. charCount--; } } else { // 2 byte encoding ch &= 0x1F; // check for non-shortest form if (ch <= 1) { ch |= 0xc0; goto InvalidByteSequence; } // Add bit flags so we'll be flagged correctly ch |= (FinalByte >> 6); } continue; } EncodeChar: #if FASTLOOP int availableBytes = PtrDiff(pEnd, pSrc); // don't fall into the fast decoding loop if we don't have enough bytes if (availableBytes <= 13) { // try to get over the remainder of the ascii characters fast though byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered while (pSrc < pLocalEnd) { ch = *pSrc; pSrc++; if (ch > 0x7F) goto ProcessChar; } // we are done ch = 0; break; } // To compute the upper bound, assume that all characters are ASCII characters at this point, // the boundary will be decreased for every non-ASCII character we encounter // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences byte *pStop = pSrc + availableBytes - 7; while (pSrc < pStop) { ch = *pSrc; pSrc++; if (ch > 0x7F) { goto LongCode; } // get pSrc 2-byte aligned if ((unchecked((int)pSrc) & 0x1) != 0) { ch = *pSrc; pSrc++; if (ch > 0x7F) { goto LongCode; } } // get pSrc 4-byte aligned if ((unchecked((int)pSrc) & 0x2) != 0) { ch = *(ushort*)pSrc; if ((ch & 0x8080) != 0) { goto LongCodeWithMask16; } pSrc += 2; } // Run 8 + 8 characters at a time! while (pSrc < pStop) { ch = *(int*)pSrc; int chb = *(int*)(pSrc+4); if (((ch | chb) & unchecked((int)0x80808080)) != 0) { goto LongCodeWithMask32; } pSrc += 8; // This is a really small loop - unroll it if (pSrc >= pStop) break; ch = *(int*)pSrc; chb = *(int*)(pSrc+4); if (((ch | chb) & unchecked((int)0x80808080)) != 0) { goto LongCodeWithMask32; } pSrc += 8; } break; #if BIGENDIAN LongCodeWithMask32: // be careful about the sign extension ch = (int)(((uint)ch) >> 16); LongCodeWithMask16: ch = (int)(((uint)ch) >> 8); #else // BIGENDIAN LongCodeWithMask32: LongCodeWithMask16: ch &= 0xFF; #endif // BIGENDIAN pSrc++; if (ch <= 0x7F) { continue; } LongCode: int chc = *pSrc; pSrc++; if ( // bit 6 has to be zero (ch & 0x40) == 0 || // we are expecting to see trailing bytes like 10vvvvvv (chc & unchecked((sbyte)0xC0)) != 0x80) { goto BadLongCode; } chc &= 0x3F; // start a new long code if ((ch & 0x20) != 0) { // fold the first two bytes together chc |= (ch & 0x0F) << 6; if ((ch & 0x10) != 0) { // 4 byte encoding - surrogate ch = *pSrc; if ( // check that bit 4 is zero, the non-shortest form of surrogate // and the valid surrogate range 0x000000 - 0x10FFFF at the same time !InRange(chc >> 4, 0x01, 0x10) || // we are expecting to see trailing bytes like 10vvvvvv (ch & unchecked((sbyte)0xC0)) != 0x80 ) { goto BadLongCode; } chc = (chc << 6) | (ch & 0x3F); ch = *(pSrc+1); // we are expecting to see trailing bytes like 10vvvvvv if ((ch & unchecked((sbyte)0xC0)) != 0x80) { goto BadLongCode; } pSrc += 2; // extra byte charCount--; } else { // 3 byte encoding ch = *pSrc; if ( // check for non-shortest form of 3 byte seq (chc & (0x1F << 5)) == 0 || // Can't have surrogates here. (chc & (0xF800 >> 6) ) == (0xD800 >> 6) || // we are expecting to see trailing bytes like 10vvvvvv (ch & unchecked((sbyte)0xC0)) != 0x80 ) { goto BadLongCode; } pSrc++; // extra byte charCount--; } } else { // 2 byte encoding // check for non-shortest form if ((ch & 0x1E) == 0) { goto BadLongCode; } } // extra byte charCount--; } #endif // FASTLOOP // no pending bits at this point ch = 0; continue; BadLongCode: pSrc -= 2; ch = 0; continue; } // May have a problem if we have to flush if (ch != 0) { // We were already adjusting for these, so need to unadjust charCount += (ch >> 30); if (baseDecoder == null || baseDecoder.MustFlush) { // Have to do fallback for invalid bytes if (fallback == null) { if (baseDecoder == null) fallback = this.decoderFallback.CreateFallbackBuffer(); else fallback = baseDecoder.FallbackBuffer; fallback.InternalInitialize(bytes, null); } charCount += FallbackInvalidByteSequence(pSrc, ch, fallback); } } // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for count) Contract.Assert(fallback == null || fallback.Remaining == 0, "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end"); return charCount; }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0"); Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null"); Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0"); // Might use a decoder UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder; // Get our output buffer info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Get decoder info int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; Debug.Assert(firstByte == false || decoder.bitCount <= 0, "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set"); } // We may have had bits in the decoder that we couldn't output last time, so do so now if (bitCount >= 16) { // Check our decoder buffer if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF))) { ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert } // Used this one, clean up extra bits bitCount -= 16; } // Loop through the input while (buffer.MoreData) { byte currentByte = buffer.GetNextByte(); int c; if (bitCount >= 0) { // // Modified base 64 encoding. // sbyte v; if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0)) { firstByte = false; bits = (bits << 6) | ((byte)v); bitCount += 6; if (bitCount >= 16) { c = (bits >> (bitCount - 16)) & 0xFFFF; bitCount -= 16; } // If not enough bits just continue else { continue; } } else { // If it wasn't a base 64 byte, everything's going to turn off base 64 mode bitCount = -1; if (currentByte != '-') { // >= 0x80 (because of 1st if statemtn) // We need this check since the _base64Values[b] check below need b <= 0x7f. // This is not a valid base 64 byte. Terminate the shifted-sequence and // emit this byte. // not in base 64 table // According to the RFC 1642 and the example code of UTF-7 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte // Chars won't be updated unless this works, try to fallback if (!buffer.Fallback(currentByte)) { break; // Stop here, didn't throw } // Used that byte, we're done with it continue; } // // The encoding for '+' is "+-". // if (firstByte) { c = '+'; } // We just turn it off if not emitting a +, so we're done. else { continue; } } // // End of modified base 64 encoding block. // } else if (currentByte == '+') { // // Found the start of a modified base 64 encoding block or a plus sign. // bitCount = 0; firstByte = true; continue; } else { // Normal character if (currentByte >= 0x80) { // Try to fallback if (!buffer.Fallback(currentByte)) { break; // Stop here, didn't throw } // Done falling back continue; } // Use the normal character c = currentByte; } if (c >= 0) { // Check our buffer if (!buffer.AddChar((char)c)) { // No room. If it was a plain char we'll try again later. // Note, we'll consume this byte and stick it in decoder, even if we can't output it if (bitCount >= 0) // Can we rememmber this byte (char) { buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed bitCount += 16; // We'll still need that char we have in our bits } break; // didn't throw, stop } } } // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder) if (chars != null && decoder != null) { // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer) if (decoder.MustFlush) { // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder._bytesUsed = buffer.BytesUsed; } // else ignore any hanging bits. // Return our count return(buffer.Count); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { ISCIIEncoding.ISCIIDecoder isciiDecoder = (ISCIIEncoding.ISCIIDecoder)baseDecoder; Encoding.EncodingCharBuffer encodingCharBuffer = new Encoding.EncodingCharBuffer((Encoding)this, (DecoderNLS)isciiDecoder, chars, charCount, bytes, byteCount); int index1 = this.defaultCodePage; bool flag1 = false; bool flag2 = false; bool flag3 = false; char ch1 = char.MinValue; char ch2 = char.MinValue; if (isciiDecoder != null) { index1 = isciiDecoder.currentCodePage; flag1 = isciiDecoder.bLastATR; flag2 = isciiDecoder.bLastVirama; flag3 = isciiDecoder.bLastDevenagariStressAbbr; ch1 = isciiDecoder.cLastCharForNextNukta; ch2 = isciiDecoder.cLastCharForNoNextNukta; } bool flag4 = flag2 | flag1 | flag3 | (uint)ch1 > 0U; int index2 = -1; if (index1 >= 2 && index1 <= 11) { index2 = ISCIIEncoding.IndicMappingIndex[index1]; } while (encodingCharBuffer.MoreData) { byte nextByte = encodingCharBuffer.GetNextByte(); if (flag4) { flag4 = false; if (flag1) { if ((int)nextByte >= 66 && (int)nextByte <= 75) { index1 = (int)nextByte & 15; index2 = ISCIIEncoding.IndicMappingIndex[index1]; flag1 = false; continue; } if ((int)nextByte == 64) { index1 = this.defaultCodePage; index2 = -1; if (index1 >= 2 && index1 <= 11) { index2 = ISCIIEncoding.IndicMappingIndex[index1]; } flag1 = false; continue; } if ((int)nextByte == 65) { index1 = this.defaultCodePage; index2 = -1; if (index1 >= 2 && index1 <= 11) { index2 = ISCIIEncoding.IndicMappingIndex[index1]; } flag1 = false; continue; } if (encodingCharBuffer.Fallback((byte)239)) { flag1 = false; } else { break; } } else if (flag2) { if ((int)nextByte == 232) { if (encodingCharBuffer.AddChar('\x200C')) { flag2 = false; continue; } break; } if ((int)nextByte == 233) { if (encodingCharBuffer.AddChar('\x200D')) { flag2 = false; continue; } break; } flag2 = false; } else if (flag3) { if ((int)nextByte == 184) { if (encodingCharBuffer.AddChar('॒')) { flag3 = false; continue; } break; } if ((int)nextByte == 191) { if (encodingCharBuffer.AddChar('॰')) { flag3 = false; continue; } break; } if (encodingCharBuffer.Fallback((byte)240)) { flag3 = false; } else { break; } } else { if ((int)nextByte == 233) { if (encodingCharBuffer.AddChar(ch1)) { ch1 = ch2 = char.MinValue; continue; } break; } if (encodingCharBuffer.AddChar(ch2)) { ch1 = ch2 = char.MinValue; } else { break; } } } if ((int)nextByte < 160) { if (!encodingCharBuffer.AddChar((char)nextByte)) { break; } } else if ((int)nextByte == 239) { flag1 = flag4 = true; } else { char ch3 = ISCIIEncoding.IndicMapping[index2, 0, (int)nextByte - 160]; char ch4 = ISCIIEncoding.IndicMapping[index2, 1, (int)nextByte - 160]; if ((int)ch4 == 0 || (int)nextByte == 233) { if ((int)ch3 == 0) { if (!encodingCharBuffer.Fallback(nextByte)) { break; } } else if (!encodingCharBuffer.AddChar(ch3)) { break; } } else if ((int)nextByte == 232) { if (encodingCharBuffer.AddChar(ch3)) { flag2 = flag4 = true; } else { break; } } else if (((int)ch4 & 61440) == 0) { flag4 = true; ch1 = ch4; ch2 = ch3; } else { flag3 = flag4 = true; } } } if (isciiDecoder == null || isciiDecoder.MustFlush) { if (flag1) { if (encodingCharBuffer.Fallback((byte)239)) { flag1 = false; } else { int num1 = (int)encodingCharBuffer.GetNextByte(); } } else if (flag3) { if (encodingCharBuffer.Fallback((byte)240)) { flag3 = false; } else { int num2 = (int)encodingCharBuffer.GetNextByte(); } } else if ((int)ch2 != 0) { if (encodingCharBuffer.AddChar(ch2)) { ch2 = ch1 = char.MinValue; } else { int num3 = (int)encodingCharBuffer.GetNextByte(); } } } if (isciiDecoder != null && (IntPtr)chars != IntPtr.Zero) { if (((!isciiDecoder.MustFlush ? 1 : ((uint)ch2 > 0U ? 1 : 0)) | (flag1 ? 1 : 0) | (flag3 ? 1 : 0)) != 0) { isciiDecoder.currentCodePage = index1; isciiDecoder.bLastVirama = flag2; isciiDecoder.bLastATR = flag1; isciiDecoder.bLastDevenagariStressAbbr = flag3; isciiDecoder.cLastCharForNextNukta = ch1; isciiDecoder.cLastCharForNoNextNukta = ch2; } else { isciiDecoder.currentCodePage = this.defaultCodePage; isciiDecoder.bLastVirama = false; isciiDecoder.bLastATR = false; isciiDecoder.bLastDevenagariStressAbbr = false; isciiDecoder.cLastCharForNextNukta = char.MinValue; isciiDecoder.cLastCharForNoNextNukta = char.MinValue; } isciiDecoder.m_bytesUsed = encodingCharBuffer.BytesUsed; } return(encodingCharBuffer.Count); }
public override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[SBCSCodePageEncoding.GetChars]byteCount is negative"); Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[SBCSCodePageEncoding.GetChars]charCount is negative"); CheckMemorySection(); // See if we have best fit bool bUseBestFit = false; // Do it fast way if using ? replacement or best fit fallbacks byte *byteEnd = bytes + byteCount; byte *byteStart = bytes; char *charStart = chars; // Only need decoder fallback buffer if not using default replacement fallback or best fit fallback. DecoderReplacementFallback fallback = null; if (decoder == null) { fallback = DecoderFallback as DecoderReplacementFallback; bUseBestFit = DecoderFallback is InternalDecoderBestFitFallback; } else { fallback = decoder.Fallback as DecoderReplacementFallback; bUseBestFit = decoder.Fallback is InternalDecoderBestFitFallback; Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[SBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start"); } if (bUseBestFit || (fallback != null && fallback.MaxCharCount == 1)) { // Try it the fast way char replacementChar; if (fallback == null) { replacementChar = '?'; // Best fit always has ? for fallback for SBCS } else { replacementChar = fallback.DefaultString[0]; } // Need byteCount chars, otherwise too small buffer if (charCount < byteCount) { // Need at least 1 output byte, throw if must throw ThrowCharsOverflow(decoder, charCount < 1); // Not throwing, use what we can byteEnd = bytes + charCount; } // Quick loop, just do '?' replacement because we don't have fallbacks for decodings. while (bytes < byteEnd) { char c; if (bUseBestFit) { if (arrayBytesBestFit == null) { ReadBestFitTable(); } c = arrayBytesBestFit[*bytes]; } else { c = _mapBytesToUnicode[*bytes]; } bytes++; if (c == UNKNOWN_CHAR) { // This is an invalid byte in the ASCII encoding. *chars = replacementChar; } else { *chars = c; } chars++; } // bytes & chars used are the same if (decoder != null) { decoder.m_bytesUsed = (int)(bytes - byteStart); } return((int)(chars - charStart)); } // Slower way's going to need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; byte[] byteBuffer = new byte[1]; char * charEnd = chars + charCount; DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(null); // Not quite so fast loop while (bytes < byteEnd) { // Faster if don't use *bytes++; char c = _mapBytesToUnicode[*bytes]; bytes++; // See if it was unknown if (c == UNKNOWN_CHAR) { // Make sure we have a fallback buffer if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(byteEnd - byteCount, charEnd); } // Use fallback buffer Debug.Assert(bytes > byteStart, "[SBCSCodePageEncoding.GetChars]Expected bytes to have advanced already (unknown byte)"); byteBuffer[0] = *(bytes - 1); // Fallback adds fallback to chars, but doesn't increment chars unless the whole thing fits. if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars)) { // May or may not throw, but we didn't get this byte bytes--; // unused byte fallbackHelper.InternalReset(); // Didn't fall this back ThrowCharsOverflow(decoder, bytes == byteStart); // throw? break; // don't throw, but stop loop } } else { // Make sure we have buffer space if (chars >= charEnd) { Debug.Assert(bytes > byteStart, "[SBCSCodePageEncoding.GetChars]Expected bytes to have advanced already (known byte)"); bytes--; // unused byte ThrowCharsOverflow(decoder, bytes == byteStart); // throw? break; // don't throw, but stop loop } *(chars) = c; chars++; } } // Might have had decoder fallback stuff. if (decoder != null) { decoder.m_bytesUsed = (int)(bytes - byteStart); } // Expect Empty fallback buffer for GetChars Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[SBCSEncoding.GetChars]Expected Empty fallback buffer at end"); return((int)(chars - charStart)); }
internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder) { Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null"); Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0"); UTF32Decoder decoder = (UTF32Decoder)baseDecoder; // None so far! int charCount = 0; byte *end = bytes + count; byte *byteStart = bytes; // Set up decoder int readCount = 0; uint iChar = 0; // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; // See if there's anything in our decoder if (decoder != null) { readCount = decoder.readByteCount; iChar = (uint)decoder.iChar; fallbackBuffer = decoder.FallbackBuffer; // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for chars or count) Debug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start"); } else { fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); // Loop through our input, 4 characters at a time! while (bytes < end && charCount >= 0) { // Get our next character if (bigEndian) { // Scoot left and add it to the bottom iChar <<= 8; iChar += *(bytes++); } else { // Scoot right and add it to the top iChar >>= 8; iChar += (uint)(*(bytes++)) << 24; } readCount++; // See if we have all the bytes yet if (readCount < 4) { continue; } // Have the bytes readCount = 0; // See if its valid to encode if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF)) { // Need to fall back these 4 bytes byte[] fallbackBytes; if (bigEndian) { fallbackBytes = new byte[] { unchecked ((byte)(iChar >> 24)), unchecked ((byte)(iChar >> 16)), unchecked ((byte)(iChar >> 8)), unchecked ((byte)(iChar)) }; } else { fallbackBytes = new byte[] { unchecked ((byte)(iChar)), unchecked ((byte)(iChar >> 8)), unchecked ((byte)(iChar >> 16)), unchecked ((byte)(iChar >> 24)) }; } charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes); // Ignore the illegal character iChar = 0; continue; } // Ok, we have something we can add to our output if (iChar >= 0x10000) { // Surrogates take 2 charCount++; } // Add the rest of the surrogate or our normal character charCount++; // iChar is back to 0 iChar = 0; } // See if we have something left over that has to be decoded if (readCount > 0 && (decoder == null || decoder.MustFlush)) { // Oops, there's something left over with no place to go. byte[] fallbackBytes = new byte[readCount]; if (bigEndian) { while (readCount > 0) { fallbackBytes[--readCount] = unchecked ((byte)iChar); iChar >>= 8; } } else { while (readCount > 0) { fallbackBytes[--readCount] = unchecked ((byte)(iChar >> 24)); iChar <<= 8; } } charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes); } // Check for overflows. if (charCount < 0) { throw new ArgumentOutOfRangeException(nameof(count), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); } // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for chars or count) Debug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end"); // Return our count return(charCount); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative"); Debug.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative"); // Do it fast way if using ? replacement fallback byte *byteEnd = bytes + byteCount; byte *byteStart = bytes; char *charStart = chars; // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f // Only need decoder fallback buffer if not using ? fallback. // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using DecoderReplacementFallback fallback = null; char *charsForFallback; if (decoder == null) { fallback = this.DecoderFallback as DecoderReplacementFallback; } else { fallback = decoder.Fallback as DecoderReplacementFallback; Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer"); } if (fallback != null && fallback.MaxCharCount == 1) { // Try it the fast way char replacementChar = fallback.DefaultString[0]; // Need byteCount chars, otherwise too small buffer if (charCount < byteCount) { // Need at least 1 output byte, throw if must throw ThrowCharsOverflow(decoder, charCount < 1); // Not throwing, use what we can byteEnd = bytes + charCount; } // Quick loop, just do '?' replacement because we don't have fallbacks for decodings. while (bytes < byteEnd) { byte b = *(bytes++); if (b >= 0x80) { // This is an invalid byte in the ASCII encoding. *(chars++) = replacementChar; } else { *(chars++) = unchecked ((char)b); } } // bytes & chars used are the same if (decoder != null) { decoder.m_bytesUsed = (int)(bytes - byteStart); } return((int)(chars - charStart)); } // Slower way's going to need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; byte[] byteBuffer = new byte[1]; char * charEnd = chars + charCount; // Not quite so fast loop while (bytes < byteEnd) { // Faster if don't use *bytes++; byte b = *(bytes); bytes++; if (b >= 0x80) { // This is an invalid byte in the ASCII encoding. if (fallbackBuffer == null) { if (decoder == null) { fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer(); } else { fallbackBuffer = decoder.FallbackBuffer; } fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd); } // Use fallback buffer byteBuffer[0] = b; // Note that chars won't get updated unless this succeeds charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback); chars = charsForFallback; if (!fallbackResult) { // May or may not throw, but we didn't get this byte Debug.Assert(bytes > byteStart || chars == charStart, "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)"); bytes--; // unused byte fallbackBuffer.InternalReset(); // Didn't fall this back ThrowCharsOverflow(decoder, chars == charStart); // throw? break; // don't throw, but stop loop } } else { // Make sure we have buffer space if (chars >= charEnd) { Debug.Assert(bytes > byteStart || chars == charStart, "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)"); bytes--; // unused byte ThrowCharsOverflow(decoder, chars == charStart); // throw? break; // don't throw, but stop loop } *(chars) = unchecked ((char)b); chars++; } } // Might have had decoder fallback stuff. if (decoder != null) { decoder.m_bytesUsed = (int)(bytes - byteStart); } // Expect Empty fallback buffer for GetChars Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0, "[ASCIIEncoding.GetChars]Expected Empty fallback buffer"); return((int)(chars - charStart)); }
internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder) { Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null"); Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null"); Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0"); Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0"); UTF32Decoder decoder = (UTF32Decoder)baseDecoder; // None so far! char *charStart = chars; char *charEnd = chars + charCount; byte *byteStart = bytes; byte *byteEnd = bytes + byteCount; // See if there's anything in our decoder (but don't clear it yet) int readCount = 0; uint iChar = 0; // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; char *charsForFallback; // See if there's anything in our decoder if (decoder != null) { readCount = decoder.readByteCount; iChar = (uint)decoder.iChar; fallbackBuffer = baseDecoder.FallbackBuffer; // Shouldn't have anything in fallback buffer for GetChars // (don't have to check m_throwOnOverflow for chars) Debug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetChars]Expected empty fallback buffer at start"); } else { fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(bytes, chars + charCount); // Loop through our input, 4 characters at a time! while (bytes < byteEnd) { // Get our next character if (bigEndian) { // Scoot left and add it to the bottom iChar <<= 8; iChar += *(bytes++); } else { // Scoot right and add it to the top iChar >>= 8; iChar += (uint)(*(bytes++)) << 24; } readCount++; // See if we have all the bytes yet if (readCount < 4) { continue; } // Have the bytes readCount = 0; // See if its valid to encode if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF)) { // Need to fall back these 4 bytes byte[] fallbackBytes; if (bigEndian) { fallbackBytes = new byte[] { unchecked ((byte)(iChar >> 24)), unchecked ((byte)(iChar >> 16)), unchecked ((byte)(iChar >> 8)), unchecked ((byte)(iChar)) }; } else { fallbackBytes = new byte[] { unchecked ((byte)(iChar)), unchecked ((byte)(iChar >> 8)), unchecked ((byte)(iChar >> 16)), unchecked ((byte)(iChar >> 24)) }; } // Chars won't be updated unless this works. charsForFallback = chars; bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback); chars = charsForFallback; if (!fallbackResult) { // Couldn't fallback, throw or wait til next time // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart Debug.Assert(bytes >= byteStart + 4 || chars == charStart, "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)"); bytes -= 4; // get back to where we were iChar = 0; // Remembering nothing fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart); // Might throw, if no chars output break; // Stop here, didn't throw } // Ignore the illegal character iChar = 0; continue; } // Ok, we have something we can add to our output if (iChar >= 0x10000) { // Surrogates take 2 if (chars >= charEnd - 1) { // Throwing or stopping // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart Debug.Assert(bytes >= byteStart + 4 || chars == charStart, "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)"); bytes -= 4; // get back to where we were iChar = 0; // Remembering nothing ThrowCharsOverflow(decoder, chars == charStart); // Might throw, if no chars output break; // Stop here, didn't throw } *(chars++) = GetHighSurrogate(iChar); iChar = GetLowSurrogate(iChar); } // Bounds check for normal character else if (chars >= charEnd) { // Throwing or stopping // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart Debug.Assert(bytes >= byteStart + 4 || chars == charStart, "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)"); bytes -= 4; // get back to where we were iChar = 0; // Remembering nothing ThrowCharsOverflow(decoder, chars == charStart); // Might throw, if no chars output break; // Stop here, didn't throw } // Add the rest of the surrogate or our normal character *(chars++) = (char)iChar; // iChar is back to 0 iChar = 0; } // See if we have something left over that has to be decoded if (readCount > 0 && (decoder == null || decoder.MustFlush)) { // Oops, there's something left over with no place to go. byte[] fallbackBytes = new byte[readCount]; int tempCount = readCount; if (bigEndian) { while (tempCount > 0) { fallbackBytes[--tempCount] = unchecked ((byte)iChar); iChar >>= 8; } } else { while (tempCount > 0) { fallbackBytes[--tempCount] = unchecked ((byte)(iChar >> 24)); iChar <<= 8; } } charsForFallback = chars; bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback); chars = charsForFallback; if (!fallbackResult) { // Couldn't fallback. fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output // Stop here, didn't throw, backed up, so still nothing in buffer } else { // Don't clear our decoder unless we could fall it back. // If we caught the if above, then we're a convert() and will catch this next time. readCount = 0; iChar = 0; } } // Remember any left over stuff, clearing buffer as well for MustFlush if (decoder != null) { decoder.iChar = (int)iChar; decoder.readByteCount = readCount; decoder.m_bytesUsed = (int)(bytes - byteStart); } // Shouldn't have anything in fallback buffer for GetChars // (don't have to check m_throwOnOverflow for chars) Debug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetChars]Expected empty fallback buffer at end"); // Return our count return((int)(chars - charStart)); }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { Contract.Assert(chars!=null, "[UTF8Encoding.GetChars]chars!=null"); Contract.Assert(byteCount >=0, "[UTF8Encoding.GetChars]count >=0"); Contract.Assert(charCount >=0, "[UTF8Encoding.GetChars]charCount >=0"); Contract.Assert(bytes!=null, "[UTF8Encoding.GetChars]bytes!=null"); byte *pSrc = bytes; char *pTarget = chars; byte *pEnd = pSrc+byteCount; char *pAllocatedBufferEnd = pTarget+charCount; int ch = 0; DecoderFallbackBuffer fallback = null; if (baseDecoder != null) { UTF8Decoder decoder = (UTF8Decoder)baseDecoder; ch = decoder.bits; // Shouldn't have anything in fallback buffer for GetChars // (don't have to check m_throwOnOverflow for chars, we always use all or none so always should be empty) Contract.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[UTF8Encoding.GetChars]Expected empty fallback buffer at start"); } for (;;) { // SLOWLOOP: does all range checks, handles all special cases, but it is slow if (pSrc >= pEnd) { break; } if (ch == 0) { // no pending bits goto ReadChar; } // read next byte. The JIT optimization seems to be getting confused when // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead int cha = *pSrc; pSrc++; // we are expecting to see trailing bytes like 10vvvvvv if ((cha & unchecked((sbyte)0xC0)) != 0x80) { // This can be a valid starting byte for another UTF8 byte sequence, so let's put // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence pSrc--; goto InvalidByteSequence; } // fold in the new byte ch = (ch << 6) | (cha & 0x3F); if ((ch & FinalByte) == 0) { // Not at last byte yet Contract.Assert( (ch & (SupplimentarySeq | ThreeByteSeq)) != 0, "[UTF8Encoding.GetChars]Invariant volation"); if ((ch & SupplimentarySeq) != 0) { // Its a 4-byte supplimentary sequence if ((ch & (FinalByte >> 6)) != 0) { // this is 3rd byte of 4 byte sequence - nothing to do continue; } // 2nd byte of 4 bytes // check for non-shortest form of surrogate and the valid surrogate // range 0x000000 - 0x10FFFF at the same time if (!InRange(ch & 0x1F0, 0x10, 0x100)) { goto InvalidByteSequence; } } else { // Must be 2nd byte of a 3-byte sequence // check for non-shortest form of 3 byte seq if ((ch & (0x1F << 5)) == 0 || // non-shortest form (ch & (0xF800 >> 6) ) == (0xD800 >> 6)) // illegal individually encoded surrogate { goto InvalidByteSequence; } } continue; } // ready to punch // surrogate in shortest form? // Might be possible to get rid of this? Already did non-shortest check for 4-byte sequence when reading 2nd byte? if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) { // let the range check for the second char throw the exception if (pTarget < pAllocatedBufferEnd) { *pTarget = (char)( ((ch >> 10) & 0x7FF) + unchecked((short)((CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10)))) ); pTarget++; ch = (ch & 0x3FF) + unchecked((int)(CharUnicodeInfo.LOW_SURROGATE_START)); } } goto EncodeChar; InvalidByteSequence: // this code fragment should be close to the gotos referencing it // Have to do fallback for invalid bytes if (fallback == null) { if (baseDecoder == null) fallback = this.decoderFallback.CreateFallbackBuffer(); else fallback = baseDecoder.FallbackBuffer; fallback.InternalInitialize(bytes, pAllocatedBufferEnd); } // This'll back us up the appropriate # of bytes if we didn't get anywhere if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget)) { // Ran out of buffer space // Need to throw an exception? Contract.Assert(pSrc >= bytes || pTarget == chars, "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback"); fallback.InternalReset(); ThrowCharsOverflow(baseDecoder, pTarget == chars); ch = 0; break; } Contract.Assert(pSrc >= bytes, "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array"); ch = 0; continue; ReadChar: ch = *pSrc; pSrc++; ProcessChar: if (ch > 0x7F) { // If its > 0x7F, its start of a new multi-byte sequence // bit 6 has to be non-zero if ((ch & 0x40) == 0) { goto InvalidByteSequence; } // start a new long code if ((ch & 0x20) != 0) { if ((ch & 0x10) != 0) { // 4 byte encoding - supplimentary character (2 surrogates) ch &= 0x0F; // check that bit 4 is zero and the valid supplimentary character // range 0x000000 - 0x10FFFF at the same time if (ch > 0x04) { ch |= 0xf0; goto InvalidByteSequence; } ch |= (FinalByte >> 3*6) | (1 << 30) | (3 << (30-2*6)) | (SupplimentarySeq) | (SupplimentarySeq >> 6) | (SupplimentarySeq >> 2*6) | (SupplimentarySeq >> 3*6); } else { // 3 byte encoding ch = (ch & 0x0F) | ( (FinalByte >> 2*6) | (1 << 30) | (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2*6) ); } } else { // 2 byte encoding ch &= 0x1F; // check for non-shortest form if (ch <= 1) { ch |= 0xc0; goto InvalidByteSequence; } ch |= (FinalByte >> 6); } continue; } EncodeChar: // write the pending character if (pTarget >= pAllocatedBufferEnd) { // Fix chars so we make sure to throw if we didn't output anything ch &= 0x1fffff; if (ch > 0x7f) { if (ch > 0x7ff) { if (ch >= CharUnicodeInfo.LOW_SURROGATE_START && ch <= CharUnicodeInfo.LOW_SURROGATE_END) { pSrc--; // It was 4 bytes pTarget--; // 1 was stored already, but we can't remember 1/2, so back up } else if (ch > 0xffff) { pSrc--; // It was 4 bytes, nothing was stored } pSrc--; // It was at least 3 bytes } pSrc--; // It was at least 2 bytes } pSrc--; // Throw that we don't have enough room (pSrc could be < chars if we had started to process // a 4 byte sequence alredy) Contract.Assert(pSrc >= bytes || pTarget == chars, "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]"); ThrowCharsOverflow(baseDecoder, pTarget == chars); // Don't store ch in decoder, we already backed up to its start ch = 0; // Didn't throw, just use this buffer size. break; } *pTarget = (char)ch; pTarget++; #if FASTLOOP int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget); int availableBytes = PtrDiff(pEnd, pSrc); // don't fall into the fast decoding loop if we don't have enough bytes // Test for availableChars is done because pStop would be <= pTarget. if (availableBytes <= 13) { // we may need as many as 1 character per byte if (availableChars < availableBytes) { // not enough output room. no pending bits at this point ch = 0; continue; } // try to get over the remainder of the ascii characters fast though byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered while (pSrc < pLocalEnd) { ch = *pSrc; pSrc++; if (ch > 0x7F) goto ProcessChar; *pTarget = (char)ch; pTarget++; } // we are done ch = 0; break; } // we may need as many as 1 character per byte, so reduce the byte count if necessary. // If availableChars is too small, pStop will be before pTarget and we won't do fast loop. if (availableChars < availableBytes) { availableBytes = availableChars; } // To compute the upper bound, assume that all characters are ASCII characters at this point, // the boundary will be decreased for every non-ASCII character we encounter // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences char *pStop = pTarget + availableBytes - 7; while (pTarget < pStop) { ch = *pSrc; pSrc++; if (ch > 0x7F) { goto LongCode; } *pTarget = (char)ch; pTarget++; // get pSrc to be 2-byte aligned if ((unchecked((int)pSrc) & 0x1) != 0) { ch = *pSrc; pSrc++; if (ch > 0x7F) { goto LongCode; } *pTarget = (char)ch; pTarget++; } // get pSrc to be 4-byte aligned if ((unchecked((int)pSrc) & 0x2) != 0) { ch = *(ushort*)pSrc; if ((ch & 0x8080) != 0) { goto LongCodeWithMask16; } // Unfortunately, this is endianess sensitive #if BIGENDIAN *pTarget = (char)((ch >> 8) & 0x7F); pSrc += 2; *(pTarget+1) = (char)(ch & 0x7F); pTarget += 2; #else // BIGENDIAN *pTarget = (char)(ch & 0x7F); pSrc += 2; *(pTarget+1) = (char)((ch >> 8) & 0x7F); pTarget += 2; #endif // BIGENDIAN } // Run 8 characters at a time! while (pTarget < pStop) { ch = *(int*)pSrc; int chb = *(int*)(pSrc+4); if (((ch | chb) & unchecked((int)0x80808080)) != 0) { goto LongCodeWithMask32; } // Unfortunately, this is endianess sensitive #if BIGENDIAN *pTarget = (char)((ch >> 24) & 0x7F); *(pTarget+1) = (char)((ch >> 16) & 0x7F); *(pTarget+2) = (char)((ch >> 8) & 0x7F); *(pTarget+3) = (char)(ch & 0x7F); pSrc += 8; *(pTarget+4) = (char)((chb >> 24) & 0x7F); *(pTarget+5) = (char)((chb >> 16) & 0x7F); *(pTarget+6) = (char)((chb >> 8) & 0x7F); *(pTarget+7) = (char)(chb & 0x7F); pTarget += 8; #else // BIGENDIAN *pTarget = (char)(ch & 0x7F); *(pTarget+1) = (char)((ch >> 8) & 0x7F); *(pTarget+2) = (char)((ch >> 16) & 0x7F); *(pTarget+3) = (char)((ch >> 24) & 0x7F); pSrc += 8; *(pTarget+4) = (char)(chb & 0x7F); *(pTarget+5) = (char)((chb >> 8) & 0x7F); *(pTarget+6) = (char)((chb >> 16) & 0x7F); *(pTarget+7) = (char)((chb >> 24) & 0x7F); pTarget += 8; #endif // BIGENDIAN } break; #if BIGENDIAN LongCodeWithMask32: // be careful about the sign extension ch = (int)(((uint)ch) >> 16); LongCodeWithMask16: ch = (int)(((uint)ch) >> 8); #else // BIGENDIAN LongCodeWithMask32: LongCodeWithMask16: ch &= 0xFF; #endif // BIGENDIAN pSrc++; if (ch <= 0x7F) { *pTarget = (char)ch; pTarget++; continue; } LongCode: int chc = *pSrc; pSrc++; if ( // bit 6 has to be zero (ch & 0x40) == 0 || // we are expecting to see trailing bytes like 10vvvvvv (chc & unchecked((sbyte)0xC0)) != 0x80) { goto BadLongCode; } chc &= 0x3F; // start a new long code if ((ch & 0x20) != 0) { // fold the first two bytes together chc |= (ch & 0x0F) << 6; if ((ch & 0x10) != 0) { // 4 byte encoding - surrogate ch = *pSrc; if ( // check that bit 4 is zero, the non-shortest form of surrogate // and the valid surrogate range 0x000000 - 0x10FFFF at the same time !InRange(chc >> 4, 0x01, 0x10) || // we are expecting to see trailing bytes like 10vvvvvv (ch & unchecked((sbyte)0xC0)) != 0x80 ) { goto BadLongCode; } chc = (chc << 6) | (ch & 0x3F); ch = *(pSrc+1); // we are expecting to see trailing bytes like 10vvvvvv if ((ch & unchecked((sbyte)0xC0)) != 0x80) { goto BadLongCode; } pSrc += 2; ch = (chc << 6) | (ch & 0x3F); *pTarget = (char)( ((ch >> 10) & 0x7FF) + unchecked((short)(CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10))) ); pTarget++; ch = (ch & 0x3FF) + unchecked((short)(CharUnicodeInfo.LOW_SURROGATE_START)); // extra byte, we're already planning 2 chars for 2 of these bytes, // but the big loop is testing the target against pStop, so we need // to subtract 2 more or we risk overrunning the input. Subtract // one here and one below. pStop--; } else { // 3 byte encoding ch = *pSrc; if ( // check for non-shortest form of 3 byte seq (chc & (0x1F << 5)) == 0 || // Can't have surrogates here. (chc & (0xF800 >> 6) ) == (0xD800 >> 6) || // we are expecting to see trailing bytes like 10vvvvvv (ch & unchecked((sbyte)0xC0)) != 0x80 ) { goto BadLongCode; } pSrc++; ch = (chc << 6) | (ch & 0x3F); // extra byte, we're only expecting 1 char for each of these 3 bytes, // but the loop is testing the target (not source) against pStop, so // we need to subtract 2 more or we risk overrunning the input. // Subtract 1 here and one more below pStop--; } } else { // 2 byte encoding ch &= 0x1F; // check for non-shortest form if (ch <= 1) { goto BadLongCode; } ch = (ch << 6) | chc; } *pTarget = (char)ch; pTarget++; // extra byte, we're only expecting 1 char for each of these 2 bytes, // but the loop is testing the target (not source) against pStop. // subtract an extra count from pStop so that we don't overrun the input. pStop--; } #endif // FASTLOOP Contract.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd"); // no pending bits at this point ch = 0; continue; BadLongCode: pSrc -= 2; ch = 0; continue; } if (ch != 0 && (baseDecoder == null || baseDecoder.MustFlush)) { // Have to do fallback for invalid bytes if (fallback == null) { if (baseDecoder == null) fallback = this.decoderFallback.CreateFallbackBuffer(); else fallback = baseDecoder.FallbackBuffer; fallback.InternalInitialize(bytes, pAllocatedBufferEnd); } // This'll back us up the appropriate # of bytes if we didn't get anywhere if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget)) { Contract.Assert(pSrc >= bytes || pTarget == chars, "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing"); // Ran out of buffer space // Need to throw an exception? fallback.InternalReset(); ThrowCharsOverflow(baseDecoder, pTarget == chars); } Contract.Assert(pSrc >= bytes, "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array"); ch = 0; } if (baseDecoder != null) { UTF8Decoder decoder = (UTF8Decoder)baseDecoder; // If we're storing flush data we expect all bits to be used or else // we're stuck in the middle of a conversion Contract.Assert(!baseDecoder.MustFlush || ch == 0 || !baseDecoder.m_throwOnOverflow, "[UTF8Encoding.GetChars]Expected no must flush or no left over bits or no throw on overflow."); // Remember our leftover bits. decoder.bits = ch; baseDecoder.m_bytesUsed = (int)(pSrc - bytes); } // Shouldn't have anything in fallback buffer for GetChars // (don't have to check m_throwOnOverflow for chars) Contract.Assert(fallback == null || fallback.Remaining == 0, "[UTF8Encoding.GetChars]Expected empty fallback buffer at end"); return PtrDiff(pTarget, chars); }
internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { BCLDebug.Assert(bytes!=null, "[UTF32Encoding.GetCharCount]bytes!=null"); BCLDebug.Assert(count >=0, "[UTF32Encoding.GetCharCount]count >=0"); UTF32Decoder decoder = (UTF32Decoder)baseDecoder; // None so far! int charCount = 0; byte* end = bytes + count; byte* byteStart = bytes; // Set up decoder int readCount = 0; uint iChar = 0; // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; // See if there's anything in our decoder if (decoder != null) { readCount = decoder.readByteCount; iChar = (uint)decoder.iChar; fallbackBuffer = decoder.FallbackBuffer; // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for chars or count) BCLDebug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start"); } else { fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, null); // Loop through our input, 4 characters at a time! while (bytes < end && charCount >= 0) { // Get our next character if(bigEndian) { // Scoot left and add it to the bottom iChar <<= 8; iChar += *(bytes++); } else { // Scoot right and add it to the top iChar >>= 8; iChar += (uint)(*(bytes++)) << 24; } readCount++; // See if we have all the bytes yet if (readCount < 4) continue; // Have the bytes readCount = 0; // See if its valid to encode if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF)) { // Need to fall back these 4 bytes byte[] fallbackBytes; if (this.bigEndian) { fallbackBytes = new byte[] { unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) }; } else { fallbackBytes = new byte[] { unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)), unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) }; } charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes); // Ignore the illegal character iChar = 0; continue; } // Ok, we have something we can add to our output if (iChar >= 0x10000) { // Surrogates take 2 charCount++; } // Add the rest of the surrogate or our normal character charCount++; // iChar is back to 0 iChar = 0; } // See if we have something left over that has to be decoded if (readCount > 0 && (decoder == null || decoder.MustFlush)) { // Oops, there's something left over with no place to go. byte[] fallbackBytes = new byte[readCount]; if (this.bigEndian) { while(readCount > 0) { fallbackBytes[--readCount] = unchecked((byte)iChar); iChar >>= 8; } } else { while (readCount > 0) { fallbackBytes[--readCount] = unchecked((byte)(iChar>>24)); iChar <<= 8; } } charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes); } // Check for overflows. if (charCount < 0) throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); // Shouldn't have anything in fallback buffer for GetCharCount // (don't have to check m_throwOnOverflow for chars or count) BCLDebug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end"); // Return our count return charCount; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder; // Need last vars int lastByte = -1; char lastChar = (char)0; // Get our decoder (but don't clear it yet) if (decoder != null) { lastByte = decoder.lastByte; lastChar = decoder.lastChar; } // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; byte* byteEnd = bytes + byteCount; char* charEnd = chars + charCount; byte* byteStart = bytes; char* charStart = chars; while (bytes < byteEnd) { // If we're aligned then maybe we can do it fast // This'll hurt if we're unaligned because we'll always test but never be aligned #if !NO_FAST_UNICODE_LOOP #if BIGENDIAN if (bigEndian && #else // BIGENDIAN if (!bigEndian && #endif // BIGENDIAN #if WIN64 // win64 has to be long aligned (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 && #else (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 && #endif // WIN64 lastByte == -1 && lastChar == 0) { // Need -1 to check 2 at a time. If we have an even #, longChars will go // from longEnd - 1/2 long to longEnd + 1/2 long. If we're odd, longChars // will go from longEnd - 1 long to longEnd. (Might not get to use this) // We can only go iCount units (limited by shorter of char or byte buffers. ulong* longEnd = (ulong*)(bytes - 7 + (((byteEnd - bytes) >> 1 < charEnd - chars) ? (byteEnd - bytes) : (charEnd - chars) << 1)); // Need new char* so we can check 4 at a time ulong* longBytes = (ulong*)bytes; ulong* longChars = (ulong*)chars; while (longBytes < longEnd) { // See if we potentially have surrogates (0x8000 bit set) // (We're either big endian on a big endian machine or little endian on // a little endian machine so this'll work) if ((0x8000800080008000 & *longBytes) != 0) { // See if any of these are high or low surrogates (0xd800 - 0xdfff). If the high // 5 bits looks like 11011, then its a high or low surrogate. // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set. // Note that we expect BMP characters to be more common than surrogates // & each char with 11111... then ^ with 11011. Zeroes then indicate surrogates ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800; // Check each of the 4 chars. 0 for those 16 bits means it was a surrogate // but no clue if they're high or low. // If each of the 4 characters are non-zero, then none are surrogates. if ((uTemp & 0xFFFF000000000000) == 0 || (uTemp & 0x0000FFFF00000000) == 0 || (uTemp & 0x00000000FFFF0000) == 0 || (uTemp & 0x000000000000FFFF) == 0) { // It has at least 1 surrogate, but we don't know if they're high or low surrogates, // or if there's 1 or 4 surrogates // If they happen to be high/low/high/low, we may as well continue. Check the next // bit to see if its set (low) or not (high) in the right pattern #if BIGENDIAN if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0) #else if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0) #endif { // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high // was hoped for or the 0x0400 bit wasn't set where a low was hoped for. // Drop out to the slow loop to resolve the surrogates break; } // else they are all surrogates in High/Low/High/Low order, so we can use them. } // else none are surrogates, so we can use them. } // else all < 0x8000 so we can use them // We can use these 4 chars. *longChars = *longBytes; longBytes++; longChars++; } chars = (char*)longChars; bytes = (byte*)longBytes; if (bytes >= byteEnd) break; } #endif // !NO_FAST_UNICODE_LOOP // Get 1st byte if (lastByte < 0) { lastByte = *bytes++; continue; } // Get full char char ch; if (bigEndian) { ch = (char)(lastByte << 8 | *(bytes++)); } else { ch = (char)(*(bytes++) << 8 | lastByte); } lastByte = -1; // See if the char's valid if (ch >= 0xd800 && ch <= 0xdfff) { // Was it a high surrogate? if (ch <= 0xdbff) { // Its a high surrogate, if we had one then do fallback for previous one if (lastChar > 0) { // Get fallback for previous high surrogate // Note we have to reconstruct bytes because some may have been in decoder byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) }; } else { byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, charEnd); } if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) { bytes -= 2; // didn't use these 2 bytes fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // couldn't fallback but didn't throw } } // Ignore the previous high surrogate which fell back already, // yet remember the current high surrogate for next time. lastChar = ch; continue; } // Its a low surrogate if (lastChar == 0) { // Expected a previous high surrogate // Get fallback for this low surrogate // Note we have to reconstruct bytes because some may have been in decoder byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(ch >> 8)), unchecked((byte)ch) }; } else { byteBuffer = new byte[] { unchecked((byte)ch), unchecked((byte)(ch >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, charEnd); } if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) { bytes -= 2; // didn't use these 2 bytes fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // couldn't fallback but didn't throw } // Didn't throw, ignore this one (we already did its fallback) continue; } // Valid surrogate pair, add our lastChar (will need 2 chars) if (chars >= charEnd - 1) { bytes -= 2; // didn't use these 2 bytes ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output // Leave lastChar for next call to Convert() break; // couldn't fallback but didn't throw } *chars++ = lastChar; lastChar = (char)0; } else if (lastChar > 0) { // Had a high surrogate, expected a low surrogate, fall back the high surrogate. byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) }; } else { byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, charEnd); } if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) { bytes -= 2; // didn't use these 2 bytes fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // couldn't fallback but didn't throw } // Not left over now, clear previous high surrogate and continue to add current char lastChar = (char)0; } // Valid char, room for it? if (chars >= charEnd) { bytes -= 2; // didn't use these bytes ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // couldn't fallback but didn't throw } // add it *chars++ = ch; } // Remember our decoder if we must if (decoder == null || decoder.MustFlush) { if (lastChar > 0) { // No hanging high surrogates allowed, do fallback and remove count for it byte[] byteBuffer = null; if (bigEndian) { byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) }; } else { byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) }; } if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, charEnd); } if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars)) { // 2 bytes couldn't fall back // We either advanced bytes or chars should == charStart and throw below bytes -= 2; // didn't use these bytes if (lastByte >= 0) bytes--; // had an extra last byte hanging around fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output // We'll remember these in our decoder though bytes += 2; if (lastByte >= 0) bytes++; goto End; } // done with this one lastChar = (char)0; } if (lastByte >= 0) { if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(byteStart, charEnd); } // No hanging odd bytes allowed if must flush if (!fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref chars)) { // odd byte couldn't fall back bytes--; // didn't use this byte fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output // didn't throw, but we'll remember it in the decoder bytes++; goto End; } // Didn't fail, clear buffer lastByte = -1; } } End: // Remember our decoder if we must if (decoder != null) { decoder.m_bytesUsed = (int)(bytes - byteStart); decoder.lastChar = lastChar; decoder.lastByte = lastByte; } // Used to do this the old way // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount); return (int)(chars - charStart); }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { BCLDebug.Assert(chars!=null, "[UTF32Encoding.GetChars]chars!=null"); BCLDebug.Assert(bytes!=null, "[UTF32Encoding.GetChars]bytes!=null"); BCLDebug.Assert(byteCount >=0, "[UTF32Encoding.GetChars]byteCount >=0"); BCLDebug.Assert(charCount >=0, "[UTF32Encoding.GetChars]charCount >=0"); UTF32Decoder decoder = (UTF32Decoder)baseDecoder; // None so far! char* charStart = chars; char* charEnd = chars + charCount; byte* byteStart = bytes; byte* byteEnd = bytes + byteCount; // See if there's anything in our decoder (but don't clear it yet) int readCount = 0; uint iChar = 0; // For fallback we may need a fallback buffer DecoderFallbackBuffer fallbackBuffer = null; // See if there's anything in our decoder if (decoder != null) { readCount = decoder.readByteCount; iChar = (uint)decoder.iChar; fallbackBuffer = baseDecoder.FallbackBuffer; // Shouldn't have anything in fallback buffer for GetChars // (don't have to check m_throwOnOverflow for chars) BCLDebug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetChars]Expected empty fallback buffer at start"); } else { fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); } // Set our internal fallback interesting things. fallbackBuffer.InternalInitialize(bytes, chars + charCount); // Loop through our input, 4 characters at a time! while (bytes < byteEnd) { // Get our next character if(bigEndian) { // Scoot left and add it to the bottom iChar <<= 8; iChar += *(bytes++); } else { // Scoot right and add it to the top iChar >>= 8; iChar += (uint)(*(bytes++)) << 24; } readCount++; // See if we have all the bytes yet if (readCount < 4) continue; // Have the bytes readCount = 0; // See if its valid to encode if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF)) { // Need to fall back these 4 bytes byte[] fallbackBytes; if (this.bigEndian) { fallbackBytes = new byte[] { unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) }; } else { fallbackBytes = new byte[] { unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)), unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) }; } // Chars won't be updated unless this works. if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars)) { // Couldn't fallback, throw or wait til next time // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart BCLDebug.Assert(bytes >= byteStart + 4 || chars == charStart, "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)"); bytes-=4; // get back to where we were iChar=0; // Remembering nothing fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // Stop here, didn't throw } // Ignore the illegal character iChar = 0; continue; } // Ok, we have something we can add to our output if (iChar >= 0x10000) { // Surrogates take 2 if (chars >= charEnd - 1) { // Throwing or stopping // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart BCLDebug.Assert(bytes >= byteStart + 4 || chars == charStart, "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)"); bytes-=4; // get back to where we were iChar=0; // Remembering nothing ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // Stop here, didn't throw } *(chars++) = GetHighSurrogate(iChar); iChar = GetLowSurrogate(iChar); } // Bounds check for normal character else if (chars >= charEnd) { // Throwing or stopping // We either read enough bytes for bytes-=4 to work, or we're // going to throw in ThrowCharsOverflow because chars == charStart BCLDebug.Assert(bytes >= byteStart + 4 || chars == charStart, "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)"); bytes-=4; // get back to where we were iChar=0; // Remembering nothing ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output break; // Stop here, didn't throw } // Add the rest of the surrogate or our normal character *(chars++) = (char)iChar; // iChar is back to 0 iChar = 0; } // See if we have something left over that has to be decoded if (readCount > 0 && (decoder == null || decoder.MustFlush)) { // Oops, there's something left over with no place to go. byte[] fallbackBytes = new byte[readCount]; int tempCount = readCount; if (this.bigEndian) { while(tempCount > 0) { fallbackBytes[--tempCount] = unchecked((byte)iChar); iChar >>= 8; } } else { while (tempCount > 0) { fallbackBytes[--tempCount] = unchecked((byte)(iChar>>24)); iChar <<= 8; } } if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars)) { // Couldn't fallback. fallbackBuffer.InternalReset(); ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output // Stop here, didn't throw, backed up, so still nothing in buffer } else { // Don't clear our decoder unless we could fall it back. // If we caught the if above, then we're a convert() and will catch this next time. readCount = 0; iChar = 0; } } // Remember any left over stuff, clearing buffer as well for MustFlush if (decoder != null) { decoder.iChar = (int)iChar; decoder.readByteCount = readCount; decoder.m_bytesUsed = (int)(bytes - byteStart); } // Shouldn't have anything in fallback buffer for GetChars // (don't have to check m_throwOnOverflow for chars) BCLDebug.Assert(fallbackBuffer.Remaining == 0, "[UTF32Encoding.GetChars]Expected empty fallback buffer at end"); // Return our count return (int)(chars - charStart); }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetChars]byteCount is negative"); Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetChars]charCount is negative"); CheckMemorySection(); // Fix our decoder DBCSDecoder decoder = (DBCSDecoder)baseDecoder; // We'll need to know where the end is byte* byteStart = bytes; byte* byteEnd = bytes + byteCount; char* charStart = chars; char* charEnd = chars + charCount; bool bUsedDecoder = false; // Get our fallback DecoderFallbackBuffer fallbackBuffer = null; // Shouldn't have anything in fallback buffer for GetChars Debug.Assert(decoder == null || !decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start"); DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); // If we have a left over byte, use it if (decoder != null && decoder.bLeftOver > 0) { // We have a left over byte? if (byteCount == 0) { // No input though if (!decoder.MustFlush) { // Don't have to flush return 0; } // Well, we're flushing, so use '?' or fallback // fallback leftover byte Debug.Assert(fallbackBuffer == null, "[DBCSCodePageEncoding.GetChars]Expected empty fallback"); fallbackBuffer = decoder.FallbackBuffer; fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(bytes, charEnd); // If no room, it's hopeless, this was 1st fallback byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) }; if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars)) ThrowCharsOverflow(decoder, true); decoder.bLeftOver = 0; // Done, return it return (int)(chars - charStart); } // Get our full info int iBytes = decoder.bLeftOver << 8; iBytes |= (*bytes); bytes++; // Look up our bytes char cDecoder = mapBytesToUnicode[iBytes]; if (cDecoder == UNKNOWN_CHAR_FLAG && iBytes != 0) { Debug.Assert(fallbackBuffer == null, "[DBCSCodePageEncoding.GetChars]Expected empty fallback for two bytes"); fallbackBuffer = decoder.FallbackBuffer; fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(byteEnd - byteCount, charEnd); byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars)) ThrowCharsOverflow(decoder, true); } else { // Do we have output room?, hopeless if not, this is first char if (chars >= charEnd) ThrowCharsOverflow(decoder, true); *(chars++) = cDecoder; } } // Loop, paying attention to our fallbacks. while (bytes < byteEnd) { // Faster if don't use *bytes++; int iBytes = *bytes; bytes++; char c = mapBytesToUnicode[iBytes]; // See if it was a double byte character if (c == LEAD_BYTE_CHAR) { // Its a lead byte if (bytes < byteEnd) { // Have another to use, so use it iBytes <<= 8; iBytes |= *bytes; bytes++; c = mapBytesToUnicode[iBytes]; } else { // No input left if (decoder == null || decoder.MustFlush) { // have to flush anyway, set to unknown so we use fallback c = UNKNOWN_CHAR_FLAG; } else { // Stick it in decoder bUsedDecoder = true; decoder.bLeftOver = (byte)iBytes; break; } } } // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { if (fallbackBuffer == null) { if (decoder == null) fallbackBuffer = DecoderFallback.CreateFallbackBuffer(); else fallbackBuffer = decoder.FallbackBuffer; fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer); fallbackHelper.InternalInitialize(byteEnd - byteCount, charEnd); } // Do fallback byte[] byteBuffer = null; if (iBytes < 0x100) byteBuffer = new byte[] { unchecked((byte)iBytes) }; else byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) }; if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars)) { // May or may not throw, but we didn't get these byte(s) Debug.Assert(bytes >= byteStart + byteBuffer.Length, "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for fallback"); bytes -= byteBuffer.Length; // didn't use these byte(s) fallbackHelper.InternalReset(); // Didn't fall this back ThrowCharsOverflow(decoder, bytes == byteStart); // throw? break; // don't throw, but stop loop } } else { // Do we have buffer room? if (chars >= charEnd) { // May or may not throw, but we didn't get these byte(s) Debug.Assert(bytes > byteStart, "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for lead byte"); bytes--; // unused byte if (iBytes >= 0x100) { Debug.Assert(bytes > byteStart, "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for trail byte"); bytes--; // 2nd unused byte } ThrowCharsOverflow(decoder, bytes == byteStart); // throw? break; // don't throw, but stop loop } *(chars++) = c; } } // We already stuck it in encoder if necessary, but we have to clear cases where nothing new got into decoder if (decoder != null) { // Clear it in case of MustFlush if (bUsedDecoder == false) { decoder.bLeftOver = 0; } // Remember our count decoder.m_bytesUsed = (int)(bytes - byteStart); } // Shouldn't have anything in fallback buffer for GetChars Debug.Assert(decoder == null || !decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at end"); // Return length of our output return (int)(chars - charStart); }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) { // Just call GetChars() with null chars to count return GetChars(bytes, count, null, 0, baseDecoder); }
// For decoding, the following interesting rules apply: // Virama followed by another Virama or Nukta becomes Virama + ZWNJ or Virama + ZWJ // ATR is followed by a byte to switch code pages ("fonts") // Devenagari F0, B8 -> \u0952 // Devenagari F0, BF -> \u0970 // Some characters followed by E9 become a different character instead. internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // Allow null chars for counting BCLDebug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null"); BCLDebug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative"); // BCLDebug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null"); BCLDebug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative"); // Need the ISCII Decoder ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder; // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); int currentCodePage = this.defaultCodePage; bool bLastATR = false; bool bLastVirama = false; bool bLastDevenagariStressAbbr = false; char cLastCharForNextNukta = '\0'; char cLastCharForNoNextNukta = '\0'; // See if there's anything in our decoder if (decoder != null) { currentCodePage = decoder.currentCodePage; bLastATR = decoder.bLastATR; bLastVirama = decoder.bLastVirama; bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr; cLastCharForNextNukta = decoder.cLastCharForNextNukta; cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta; } bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr | (cLastCharForNextNukta != '\0'); // Get our current code page index (some code pages are dups) int currentCodePageIndex = -1; BCLDebug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi, "[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage); if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = IndicMappingIndex[currentCodePage]; } // Loop through our input while (buffer.MoreData) { byte b = buffer.GetNextByte(); // See if last one was special if (bLastSpecial) { // Now it won't be bLastSpecial = false; // One and only one of our flags should be set BCLDebug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) + (bLastDevenagariStressAbbr ? 1 : 0) + ((cLastCharForNextNukta > 0) ? 1 : 0)) == 1, String.Format(CultureInfo.InvariantCulture, "[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}", bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta)); // If the last one was an ATR, then we'll have to do ATR stuff if (bLastATR) { // No longer last ATR, we know it wasn't bLastVirama bLastATR = false; // We only support Devanagari - Punjabi if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi)) { // Remember the code page currentCodePage = b & 0xf; currentCodePageIndex = IndicMappingIndex[currentCodePage]; continue; } // Change back to default? if (b == 0x40) { currentCodePage = this.defaultCodePage; currentCodePageIndex = -1; if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = IndicMappingIndex[currentCodePage]; } continue; } // We don't support Roman if (b == 0x41) { currentCodePage = this.defaultCodePage; currentCodePageIndex = -1; if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = IndicMappingIndex[currentCodePage]; } // Even though we don't know how to support Roman, windows didn't add a ? so we don't either. continue; } // Other code pages & ATR codes not supported, fallback the ATR buffer.Fallback(ControlATR); // turn off things bLastVirama = false; bLastATR = false; bLastDevenagariStressAbbr = false; cLastCharForNextNukta = (char)0; cLastCharForNoNextNukta = (char)0; // Keep processing this byte } else if (bLastVirama) { // If last was Virama, then we might need ZWNJ or ZWJ instead if (b == Virama) { // If no room, then stop if (!buffer.AddChar(ZWNJ)) break; bLastVirama = false; continue; } if (b == Nukta) { // If no room, then stop if (!buffer.AddChar(ZWJ)) break; bLastVirama = false; continue; } bLastVirama = false; } else if (bLastDevenagariStressAbbr) { // Last byte was an f0 (ext). // If current is b8 or bf, then we have 952 or 970. Otherwise fallback if (b == 0xb8) { // It was a b8 if (!buffer.AddChar('\x0952')) // Devanagari stress sign anudatta break; bLastDevenagariStressAbbr = false; continue; } if (b == 0xbf) { // It was a bf if (!buffer.AddChar('\x0970')) // Devanagari abbr. sign break; bLastDevenagariStressAbbr = false; continue; } // Wasn't an expected pattern, do fallback for f0 (ext) and bLastDevenagariStressAbbr = false; buffer.Fallback(DevenagariExt); // Keep processing this byte } else { // We were checking for next char being a nukta BCLDebug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0, "[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set."); // We'll either add combined char or last char if (b == Nukta) { // We combine nukta with previous char if (!buffer.AddChar(cLastCharForNextNukta)) break; // Done already cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; continue; } // No Nukta, just add last character and keep processing current byte if (!buffer.AddChar(cLastCharForNoNextNukta)) break; cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; // Keep processing this byte } } // Now bLastSpecial should be false and all flags false. BCLDebug.Assert (!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR && cLastCharForNextNukta == '\0', "[ISCIIEncoding.GetChars]No special state for last code point should exist at this point."); // If its a simple byte, just add it if (b < MultiByteBegin) { if (!buffer.AddChar((char)b)) break; continue; } // See if its an ATR marker if (b == ControlATR) { bLastATR = bLastSpecial = true; continue; } BCLDebug.Assert (currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1"); char ch = IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin]; char cAlt = IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin]; // If no 2nd char, just add it, also lonely Nuktas get added as well. if (cAlt == 0 || b == Nukta) { // If it was an unknown character do fallback // ? if not known. if (ch == 0) { if (!buffer.Fallback(b)) break; } else { if (!buffer.AddChar(ch)) break; } continue; } // if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed. if (b == Virama) { if (!buffer.AddChar(ch)) break; bLastVirama = bLastSpecial = true; continue; } // See if its one that changes with a Nukta if ((cAlt & 0xF000) == 0) { // It could change if next char is a nukta bLastSpecial = true; cLastCharForNextNukta = cAlt; cLastCharForNoNextNukta = ch; continue; } // We must be the Devenagari special case for F0, B8 & F0, BF BCLDebug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt, String.Format(CultureInfo.InvariantCulture, "[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.", DevenagariExt, b, CodeDevanagari, currentCodePage)); bLastDevenagariStressAbbr = bLastSpecial = true; } // If we don't have a decoder, or if we had to flush, then we need to get rid // of last ATR, LastNoNextNukta and LastDevenagariExt. if (decoder == null || decoder.MustFlush) { // If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well. if (bLastATR) { // Have to add ATR fallback if (buffer.Fallback(ControlATR)) bLastATR = false; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } else if (bLastDevenagariStressAbbr) { // Have to do fallback for DevenagariExt if (buffer.Fallback(DevenagariExt)) bLastDevenagariStressAbbr = false; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } else if (cLastCharForNoNextNukta != '\0') { // Have to add our last char because there was no next nukta if (buffer.AddChar(cLastCharForNoNextNukta)) cLastCharForNoNextNukta = cLastCharForNextNukta = '\0'; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } // LastVirama is unimportant for flushing decoder. } // Remember any left over stuff // (only remember if we aren't counting) if (decoder != null && chars != null) { // If not flushing or have state (from convert) then need to remember state if (!decoder.MustFlush || cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr) { // Either not flushing or had state (from convert) BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing"); decoder.currentCodePage = currentCodePage; decoder.bLastVirama = bLastVirama; decoder.bLastATR = bLastATR; decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr; decoder.cLastCharForNextNukta = cLastCharForNextNukta; decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta; } else { decoder.currentCodePage = this.defaultCodePage; decoder.bLastVirama = false; decoder.bLastATR = false; decoder.bLastDevenagariStressAbbr = false; decoder.cLastCharForNextNukta = '\0'; decoder.cLastCharForNoNextNukta = '\0'; } decoder.m_bytesUsed = buffer.BytesUsed; } // Otherwise we already did fallback and added extra things // Return the # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null chars as a count Debug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative"); // Debug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative"); // Fix our decoder GB18030Decoder decoder = (GB18030Decoder)baseDecoder; // Get our info. EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); // Need temp bytes because we can't muss up decoder short byte1 = -1; short byte2 = -1; short byte3 = -1; short byte4 = -1; // See if there was anything to get out of the decoder if (decoder != null && decoder.bLeftOver1 != -1) { // Need temp bytes because we can't muss up decoder byte1 = decoder.bLeftOver1; byte2 = decoder.bLeftOver2; byte3 = decoder.bLeftOver3; byte4 = decoder.bLeftOver4; // Loop because we might have too many in buffer // This could happen if we are working on a 4 byte sequence, but it isn't valid. while (byte1 != -1) { // If its not a lead byte, use ? or its value, then scoot them down & try again // This could happen if we previously had a bad 4 byte sequence and this is a trail byte if (!IsGBLeadByte(byte1)) { // This is either a ? or ASCII, need 1 char output if (byte1 <= 0x7f) { if (!buffer.AddChar((char)byte1)) // Its ASCII break; } else { if (!buffer.Fallback((byte)byte1)) // Not a valid byte break; } byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; continue; } // Read in more bytes as needed while (byte2 == -1 || (IsGBFourByteTrailing(byte2) && byte4 == -1)) { // Do we have room? if (!buffer.MoreData) { // No input left to read, do we have to flush? if (!decoder.MustFlush) { // Don't stick stuff in decoder when counting if (chars != null) { // Don't have to flush, won't have any chars // Decoder is correct, just return decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } // We'll have to flush, add a ? and scoot them down to try again // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out // Breaking will do this because we have zeros break; } // Read them in if (byte2 == -1) byte2 = buffer.GetNextByte(); else if (byte3 == -1) byte3 = buffer.GetNextByte(); else byte4 = buffer.GetNextByte(); } // Now we have our 2 or 4 bytes if (IsGBTwoByteTrailing(byte2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // int iTwoBytes = byte1 << 8; iTwoBytes |= unchecked((byte)byte2); if (!buffer.AddChar(mapBytesToUnicode[iTwoBytes], 2)) break; // We're done with it byte1 = -1; byte2 = -1; } else if (IsGBFourByteTrailing(byte2) && IsGBLeadByte(byte3) && IsGBFourByteTrailing(byte4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset( byte1, byte2, byte3, byte4); // What kind is it? if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode // We already checked our buffer space. // Do fallback here if we implement decoderfallbacks. if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4)) break; } // We're done with this one byte1 = -1; byte2 = -1; byte3 = -1; byte4 = -1; } else { // Not a valid sequence, use '?' for 1st byte & scoot them all down 1 if (!buffer.Fallback((byte)byte1)) break; // Move all bytes down 1 byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; } } } // Loop, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData) { byte ch = buffer.GetNextByte(); // ASCII case is easy if (ch <= 0x7f) { // ASCII, have room? if (!buffer.AddChar((char)ch)) break; // No room in convert buffer, so stop } // See if its a lead byte else if (IsGBLeadByte(ch)) { // ch is a lead byte, have room for more? if (buffer.MoreData) { byte ch2 = buffer.GetNextByte(); if (IsGBTwoByteTrailing(ch2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // // // Two-byte GB18030 // int iTwoBytes = ch << 8; iTwoBytes |= ch2; if (!buffer.AddChar(mapBytesToUnicode[iTwoBytes], 2)) break; } else if (IsGBFourByteTrailing(ch2)) { // Do we have room for Four Byte Sequence? (already have 1 byte) if (buffer.EvenMoreData(2)) { // Is it a valid 4 byte sequence? byte ch3 = buffer.GetNextByte(); byte ch4 = buffer.GetNextByte(); if (IsGBLeadByte(ch3) && IsGBFourByteTrailing(ch4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4); // What kind is it? // We'll be at least 1 BMP char or a '?' char. if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode if (!buffer.Fallback(ch, ch2, ch3, ch4)) break; } } else { // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again buffer.AdjustBytes(-3); if (!buffer.Fallback(ch)) break; } } else { // No room for 4 bytes, have 2 already, may be one more // Lead byte but no place to stick it if (decoder != null && !decoder.MustFlush) { // (make sure not to set decoder if counting, so check chars) if (chars != null) { // We'll be able to stick the remainder in the decoder byte1 = ch; byte2 = ch2; if (buffer.MoreData) byte3 = buffer.GetNextByte(); else byte3 = -1; byte4 = -1; } break; } // Won't go in decoder, we'll use '?' for it. if (!buffer.Fallback(ch, ch2)) break; } } else { // Unknown byte sequence, fall back lead byte and try 2nd one again buffer.AdjustBytes(-1); if (!buffer.Fallback(ch)) break; } } else { // Lead byte but don't know about trail byte // (make sure not to set decoder if counting, so check bytes) if (decoder != null && !decoder.MustFlush) { // We'll be able to stick it in the decoder // (don't actually do it when counting though) if (chars != null) { byte1 = ch; byte2 = -1; byte3 = -1; byte4 = -1; } break; } if (!buffer.Fallback(ch)) break; } } else { // Not ASCII and not a lead byte, we'll use '?' for it if we have room if (!buffer.Fallback(ch)) break; } } // Need to flush the decoder if necessary // (make sure not to set decoder if counting, so check bytes) if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return the # of characters we found return buffer.Count; }