Example #1
0
        [System.Security.SecurityCritical]  // auto-generated
        internal unsafe EncodingCharBuffer(EncodingNLS enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount)
        {
            _enc = enc;
            _decoder = decoder;

            _chars = charStart;
            _charStart = charStart;
            _charEnd = charStart + charCount;

            _byteStart = byteStart;
            _bytes = byteStart;
            _byteEnd = byteStart + byteCount;

            if (_decoder == null)
                _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
            else
                _fallbackBuffer = _decoder.FallbackBuffer;

            // If we're getting chars or getting char count we don't expect to have
            // to remember fallbacks between calls (so it should be empty)
            Debug.Assert(_fallbackBuffer.Remaining == 0,
                "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
            _fallbackBufferHelper = new DecoderFallbackBufferHelper(_fallbackBuffer);
            _fallbackBufferHelper.InternalInitialize(_bytes, _charEnd);
        }
Example #2
0
        internal unsafe EncodingCharBuffer(EncodingNLS enc, DecoderNLS decoder, char *charStart, int charCount, byte *byteStart, int byteCount)
        {
            _enc     = enc;
            _decoder = decoder;

            _chars     = charStart;
            _charStart = charStart;
            _charEnd   = charStart + charCount;

            _byteStart = byteStart;
            _bytes     = byteStart;
            _byteEnd   = byteStart + byteCount;

            if (_decoder == null)
            {
                _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
            }
            else
            {
                _fallbackBuffer = _decoder.FallbackBuffer;
            }

            // If we're getting chars or getting char count we don't expect to have
            // to remember fallbacks between calls (so it should be empty)
            Debug.Assert(_fallbackBuffer.Remaining == 0,
                         "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
            _fallbackBufferHelper = new DecoderFallbackBufferHelper(_fallbackBuffer);
            _fallbackBufferHelper.InternalInitialize(_bytes, _charEnd);
        }
Example #3
0
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            DecoderReplacementFallback replacementFallback = decoder != null ? decoder.Fallback as DecoderReplacementFallback : this.DecoderFallback as DecoderReplacementFallback;

            if (replacementFallback != null && replacementFallback.MaxCharCount == 1)
            {
                return(count);
            }
            DecoderFallbackBuffer decoderFallbackBuffer = (DecoderFallbackBuffer)null;
            int num1 = count;

            byte[] bytes1 = new byte[1];
            byte * numPtr = bytes + count;

            while (bytes < numPtr)
            {
                byte num2 = *bytes;
                ++bytes;
                if ((int)num2 >= 128)
                {
                    if (decoderFallbackBuffer == null)
                    {
                        decoderFallbackBuffer = decoder != null ? decoder.FallbackBuffer : this.DecoderFallback.CreateFallbackBuffer();
                        decoderFallbackBuffer.InternalInitialize(numPtr - count, (char *)null);
                    }
                    bytes1[0] = num2;
                    num1      = num1 - 1 + decoderFallbackBuffer.InternalFallback(bytes1, bytes);
                }
            }
            return(num1);
        }
Example #4
0
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder)
        {
            Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
            Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");

            // Just call GetChars with null char* to do counting
            return(GetChars(bytes, count, null, 0, baseDecoder));
        }
Example #5
0
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            base.CheckMemorySection();
            bool isMicrosoftBestFitFallback            = false;
            DecoderReplacementFallback decoderFallback = null;

            if (decoder == null)
            {
                decoderFallback            = base.DecoderFallback as DecoderReplacementFallback;
                isMicrosoftBestFitFallback = base.DecoderFallback.IsMicrosoftBestFitFallback;
            }
            else
            {
                decoderFallback            = decoder.Fallback as DecoderReplacementFallback;
                isMicrosoftBestFitFallback = decoder.Fallback.IsMicrosoftBestFitFallback;
            }
            if (isMicrosoftBestFitFallback || ((decoderFallback != null) && (decoderFallback.MaxCharCount == 1)))
            {
                return(count);
            }
            DecoderFallbackBuffer fallbackBuffer = null;
            int num = count;

            byte[] buffer2 = new byte[1];
            byte * numPtr  = bytes + count;

            while (bytes < numPtr)
            {
                char ch = this.mapBytesToUnicode[bytes[0]];
                bytes++;
                if (ch == 0xfffd)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(numPtr - count, null);
                    }
                    buffer2[0] = *(bytes - 1);
                    num--;
                    num += fallbackBuffer.InternalFallback(buffer2, bytes);
                }
            }
            return(num);
        }
        internal unsafe override int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            base.CheckMemorySection();
            DecoderReplacementFallback decoderReplacementFallback;
            bool isMicrosoftBestFitFallback;

            if (decoder == null)
            {
                decoderReplacementFallback = (base.DecoderFallback as DecoderReplacementFallback);
                isMicrosoftBestFitFallback = base.DecoderFallback.IsMicrosoftBestFitFallback;
            }
            else
            {
                decoderReplacementFallback = (decoder.Fallback as DecoderReplacementFallback);
                isMicrosoftBestFitFallback = decoder.Fallback.IsMicrosoftBestFitFallback;
            }
            if (isMicrosoftBestFitFallback || (decoderReplacementFallback != null && decoderReplacementFallback.MaxCharCount == 1))
            {
                return(count);
            }
            DecoderFallbackBuffer decoderFallbackBuffer = null;
            int num = count;

            byte[] array = new byte[1];
            byte * ptr   = bytes + count;

            while (bytes < ptr)
            {
                char c = this.mapBytesToUnicode[*bytes];
                bytes++;
                if (c == '�')
                {
                    if (decoderFallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            decoderFallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            decoderFallbackBuffer = decoder.FallbackBuffer;
                        }
                        decoderFallbackBuffer.InternalInitialize(ptr - count, null);
                    }
                    array[0] = *(bytes - 1);
                    num--;
                    num += decoderFallbackBuffer.InternalFallback(array, bytes);
                }
            }
            return(num);
        }
Example #7
0
        internal static DecoderFallbackBuffer CreateAndInitialize(Encoding encoding, DecoderNLS decoder, int originalByteCount)
        {
            // The original byte count is only used for keeping track of what 'index' value needs
            // to be passed to the abstract Fallback method. The index value is calculated by subtracting
            // 'bytes.Length' (where bytes is expected to be the entire remaining input buffer)
            // from the 'originalByteCount' value specified here.

            DecoderFallbackBuffer fallbackBuffer = (decoder is null) ? encoding.DecoderFallback.CreateFallbackBuffer() : decoder.FallbackBuffer;

            fallbackBuffer._encoding          = encoding;
            fallbackBuffer._decoder           = decoder;
            fallbackBuffer._originalByteCount = originalByteCount;

            return(fallbackBuffer);
        }
Example #8
0
        internal unsafe override int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            DecoderReplacementFallback decoderReplacementFallback;

            if (decoder == null)
            {
                decoderReplacementFallback = (base.DecoderFallback as DecoderReplacementFallback);
            }
            else
            {
                decoderReplacementFallback = (decoder.Fallback as DecoderReplacementFallback);
            }
            if (decoderReplacementFallback != null && decoderReplacementFallback.MaxCharCount == 1)
            {
                return(count);
            }
            DecoderFallbackBuffer decoderFallbackBuffer = null;
            int num = count;

            byte[] array = new byte[1];
            byte * ptr   = bytes + count;

            while (bytes < ptr)
            {
                byte b = *bytes;
                bytes++;
                if (b >= 128)
                {
                    if (decoderFallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            decoderFallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            decoderFallbackBuffer = decoder.FallbackBuffer;
                        }
                        decoderFallbackBuffer.InternalInitialize(ptr - count, null);
                    }
                    array[0] = b;
                    num--;
                    num += decoderFallbackBuffer.InternalFallback(array, bytes);
                }
            }
            return(num);
        }
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            DecoderReplacementFallback decoderFallback = null;

            if (decoder == null)
            {
                decoderFallback = base.DecoderFallback as DecoderReplacementFallback;
            }
            else
            {
                decoderFallback = decoder.Fallback as DecoderReplacementFallback;
            }
            if ((decoderFallback != null) && (decoderFallback.MaxCharCount == 1))
            {
                return(count);
            }
            DecoderFallbackBuffer fallbackBuffer = null;
            int num = count;

            byte[] buffer2 = new byte[1];
            byte * numPtr  = bytes + count;

            while (bytes < numPtr)
            {
                byte num2 = bytes[0];
                bytes++;
                if (num2 >= 0x80)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(numPtr - count, null);
                    }
                    buffer2[0] = num2;
                    num--;
                    num += fallbackBuffer.InternalFallback(buffer2, bytes);
                }
            }
            return(num);
        }
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            this.CheckMemorySection();
            DecoderReplacementFallback replacementFallback;
            bool microsoftBestFitFallback;

            if (decoder == null)
            {
                replacementFallback      = this.DecoderFallback as DecoderReplacementFallback;
                microsoftBestFitFallback = this.DecoderFallback.IsMicrosoftBestFitFallback;
            }
            else
            {
                replacementFallback      = decoder.Fallback as DecoderReplacementFallback;
                microsoftBestFitFallback = decoder.Fallback.IsMicrosoftBestFitFallback;
            }
            if (microsoftBestFitFallback || replacementFallback != null && replacementFallback.MaxCharCount == 1)
            {
                return(count);
            }
            DecoderFallbackBuffer decoderFallbackBuffer = (DecoderFallbackBuffer)null;
            int num1 = count;

            byte[] bytes1 = new byte[1];
            byte * numPtr = bytes + count;

            while (bytes < numPtr)
            {
                int num2 = (int)this.mapBytesToUnicode[*bytes];
                ++bytes;
                int num3 = 65533;
                if (num2 == num3)
                {
                    if (decoderFallbackBuffer == null)
                    {
                        decoderFallbackBuffer = decoder != null ? decoder.FallbackBuffer : this.DecoderFallback.CreateFallbackBuffer();
                        decoderFallbackBuffer.InternalInitialize(numPtr - count, (char *)null);
                    }
                    bytes1[0] = *(bytes - 1);
                    num1      = num1 - 1 + decoderFallbackBuffer.InternalFallback(bytes1, bytes);
                }
            }
            return(num1);
        }
Example #11
0
        internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder)
        {
            GB18030Encoding.GB18030Decoder gb18030Decoder     = (GB18030Encoding.GB18030Decoder)baseDecoder;
            Encoding.EncodingCharBuffer    encodingCharBuffer = new Encoding.EncodingCharBuffer((Encoding)this, (DecoderNLS)gb18030Decoder, chars, charCount, bytes, byteCount);
            short num1 = -1;
            short num2 = -1;
            short num3 = -1;
            short num4 = -1;

            if (gb18030Decoder != null && (int)gb18030Decoder.bLeftOver1 != -1)
            {
                num1 = gb18030Decoder.bLeftOver1;
                num2 = gb18030Decoder.bLeftOver2;
                num3 = gb18030Decoder.bLeftOver3;
                num4 = gb18030Decoder.bLeftOver4;
                while ((int)num1 != -1)
                {
                    if (!this.IsGBLeadByte(num1))
                    {
                        if ((int)num1 <= (int)sbyte.MaxValue)
                        {
                            if (!encodingCharBuffer.AddChar((char)num1))
                            {
                                break;
                            }
                        }
                        else if (!encodingCharBuffer.Fallback((byte)num1))
                        {
                            break;
                        }
                        num1 = num2;
                        num2 = num3;
                        num3 = num4;
                        num4 = (short)-1;
                    }
                    else
                    {
                        while ((int)num2 == -1 || this.IsGBFourByteTrailing(num2) && (int)num4 == -1)
                        {
                            if (!encodingCharBuffer.MoreData)
                            {
                                if (!gb18030Decoder.MustFlush)
                                {
                                    if ((IntPtr)chars != IntPtr.Zero)
                                    {
                                        gb18030Decoder.bLeftOver1 = num1;
                                        gb18030Decoder.bLeftOver2 = num2;
                                        gb18030Decoder.bLeftOver3 = num3;
                                        gb18030Decoder.bLeftOver4 = num4;
                                    }
                                    gb18030Decoder.m_bytesUsed = encodingCharBuffer.BytesUsed;
                                    return(encodingCharBuffer.Count);
                                }
                                break;
                            }
                            if ((int)num2 == -1)
                            {
                                num2 = (short)encodingCharBuffer.GetNextByte();
                            }
                            else if ((int)num3 == -1)
                            {
                                num3 = (short)encodingCharBuffer.GetNextByte();
                            }
                            else
                            {
                                num4 = (short)encodingCharBuffer.GetNextByte();
                            }
                        }
                        if (this.IsGBTwoByteTrailing(num2))
                        {
                            int index = (int)num1 << 8 | (int)(byte)num2;
                            if (encodingCharBuffer.AddChar(this.mapBytesToUnicode[index], 2))
                            {
                                num1 = (short)-1;
                                num2 = (short)-1;
                            }
                            else
                            {
                                break;
                            }
                        }
                        else if (this.IsGBFourByteTrailing(num2) && this.IsGBLeadByte(num3) && this.IsGBFourByteTrailing(num4))
                        {
                            int fourBytesOffset = this.GetFourBytesOffset(num1, num2, num3, num4);
                            if (fourBytesOffset <= 39419)
                            {
                                if (!encodingCharBuffer.AddChar(this.map4BytesToUnicode[fourBytesOffset], 4))
                                {
                                    break;
                                }
                            }
                            else if (fourBytesOffset >= 189000 && fourBytesOffset <= 1237575)
                            {
                                int num5 = fourBytesOffset - 189000;
                                if (!encodingCharBuffer.AddChar((char)(55296 + num5 / 1024), (char)(56320 + num5 % 1024), 4))
                                {
                                    break;
                                }
                            }
                            else if (!encodingCharBuffer.Fallback((byte)num1, (byte)num2, (byte)num3, (byte)num4))
                            {
                                break;
                            }
                            num1 = (short)-1;
                            num2 = (short)-1;
                            num3 = (short)-1;
                            num4 = (short)-1;
                        }
                        else if (encodingCharBuffer.Fallback((byte)num1))
                        {
                            num1 = num2;
                            num2 = num3;
                            num3 = num4;
                            num4 = (short)-1;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }
            while (encodingCharBuffer.MoreData)
            {
                byte nextByte1 = encodingCharBuffer.GetNextByte();
                if ((int)nextByte1 <= (int)sbyte.MaxValue)
                {
                    if (!encodingCharBuffer.AddChar((char)nextByte1))
                    {
                        break;
                    }
                }
                else if (this.IsGBLeadByte((short)nextByte1))
                {
                    if (encodingCharBuffer.MoreData)
                    {
                        byte nextByte2 = encodingCharBuffer.GetNextByte();
                        if (this.IsGBTwoByteTrailing((short)nextByte2))
                        {
                            int index = (int)nextByte1 << 8 | (int)nextByte2;
                            if (!encodingCharBuffer.AddChar(this.mapBytesToUnicode[index], 2))
                            {
                                break;
                            }
                        }
                        else if (this.IsGBFourByteTrailing((short)nextByte2))
                        {
                            if (encodingCharBuffer.EvenMoreData(2))
                            {
                                byte nextByte3 = encodingCharBuffer.GetNextByte();
                                byte nextByte4 = encodingCharBuffer.GetNextByte();
                                if (this.IsGBLeadByte((short)nextByte3) && this.IsGBFourByteTrailing((short)nextByte4))
                                {
                                    int fourBytesOffset = this.GetFourBytesOffset((short)nextByte1, (short)nextByte2, (short)nextByte3, (short)nextByte4);
                                    if (fourBytesOffset <= 39419)
                                    {
                                        if (!encodingCharBuffer.AddChar(this.map4BytesToUnicode[fourBytesOffset], 4))
                                        {
                                            break;
                                        }
                                    }
                                    else if (fourBytesOffset >= 189000 && fourBytesOffset <= 1237575)
                                    {
                                        int num5 = fourBytesOffset - 189000;
                                        if (!encodingCharBuffer.AddChar((char)(55296 + num5 / 1024), (char)(56320 + num5 % 1024), 4))
                                        {
                                            break;
                                        }
                                    }
                                    else if (!encodingCharBuffer.Fallback(nextByte1, nextByte2, nextByte3, nextByte4))
                                    {
                                        break;
                                    }
                                }
                                else
                                {
                                    encodingCharBuffer.AdjustBytes(-3);
                                    if (!encodingCharBuffer.Fallback(nextByte1))
                                    {
                                        break;
                                    }
                                }
                            }
                            else
                            {
                                if (gb18030Decoder != null && !gb18030Decoder.MustFlush)
                                {
                                    if ((IntPtr)chars != IntPtr.Zero)
                                    {
                                        num1 = (short)nextByte1;
                                        num2 = (short)nextByte2;
                                        num3 = !encodingCharBuffer.MoreData ? (short)-1 : (short)encodingCharBuffer.GetNextByte();
                                        num4 = (short)-1;
                                        break;
                                    }
                                    break;
                                }
                                if (!encodingCharBuffer.Fallback(nextByte1, nextByte2))
                                {
                                    break;
                                }
                            }
                        }
                        else
                        {
                            encodingCharBuffer.AdjustBytes(-1);
                            if (!encodingCharBuffer.Fallback(nextByte1))
                            {
                                break;
                            }
                        }
                    }
                    else
                    {
                        if (gb18030Decoder != null && !gb18030Decoder.MustFlush)
                        {
                            if ((IntPtr)chars != IntPtr.Zero)
                            {
                                num1 = (short)nextByte1;
                                num2 = (short)-1;
                                num3 = (short)-1;
                                num4 = (short)-1;
                                break;
                            }
                            break;
                        }
                        if (!encodingCharBuffer.Fallback(nextByte1))
                        {
                            break;
                        }
                    }
                }
                else if (!encodingCharBuffer.Fallback(nextByte1))
                {
                    break;
                }
            }
            if (gb18030Decoder != null)
            {
                if ((IntPtr)chars != IntPtr.Zero)
                {
                    gb18030Decoder.bLeftOver1 = num1;
                    gb18030Decoder.bLeftOver2 = num2;
                    gb18030Decoder.bLeftOver3 = num3;
                    gb18030Decoder.bLeftOver4 = num4;
                }
                gb18030Decoder.m_bytesUsed = encodingCharBuffer.BytesUsed;
            }
            return(encodingCharBuffer.Count);
        }
Example #12
0
        internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
        {
            if (decoder == null || decoder.m_throwOnOverflow || nothingDecoded)
            {
                if (decoder != null && decoder.InternalHasFallbackBuffer)
                    decoder.FallbackBuffer.InternalReset();

                // Special message to include fallback type in case fallback's GetMaxCharCount is broken
                // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
                ThrowCharsOverflow();
            }

            // If we didn't throw, we are in convert and have to remember our flushing
            decoder.ClearMustFlush();
        }
Example #13
0
        // This is internal and called by something else,
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
        {
            // Just assert, we're called internally so these should be safe, checked already
            Debug.Assert(bytes != null, "[Latin1Encoding.GetCharCount]bytes is null");
            Debug.Assert(count >= 0, "[Latin1Encoding.GetCharCount]byteCount is negative");

            // Just return length, SBCS stay the same length because they don't map to surrogate
            // pairs and we don't have to fallback because all latin1Encoding code points are unicode
            return count;
        }
Example #14
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
        {
            // Just assert, we're called internally so these should be safe, checked already
            Contract.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null");
            Contract.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative");

            // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
                fallback = this.DecoderFallback as DecoderReplacementFallback;
            else
            {
                fallback = decoder.Fallback as DecoderReplacementFallback;
                Contract.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
                    decoder.FallbackBuffer.Remaining == 0,
                    "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer");
            }

            if (fallback != null && fallback.MaxCharCount == 1)
            {
                // Just return length, SBCS stay the same length because they don't map to surrogate
                // pairs and we don't have a decoder fallback.

                return count;
            }

            // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
            DecoderFallbackBuffer fallbackBuffer = null;

            // Have to do it the hard way.
            // Assume charCount will be == count
            int charCount = count;
            byte[] byteBuffer = new byte[1];

            // Do it our fast way
            byte* byteEnd = bytes + count;

            // Quick loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                byte b = *bytes;
                bytes++;

                // If unknown we have to do fallback count
                if (b >= 0x80)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;
                        fallbackBuffer.InternalInitialize(byteEnd - count, null);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = b;
                    charCount--;            // Have to unreserve the one we already allocated for b
                    charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
                }
            }

            // Fallback buffer must be empty
            Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
                "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer");

            // Converted sequence is same length as input
            return charCount;
        }
Example #15
0
        [System.Security.SecurityCritical]  // auto-generated
        public override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            // Just assert, we're called internally so these should be safe, checked already
            Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetCharCount]bytes is null");
            Debug.Assert(count >= 0, "[DBCSCodePageEncoding.GetCharCount]byteCount is negative");

            CheckMemorySection();

            // Fix our decoder
            DBCSDecoder decoder = (DBCSDecoder)baseDecoder;

            // Get our fallback
            DecoderFallbackBuffer fallbackBuffer = null;

            // We'll need to know where the end is
            byte* byteEnd = bytes + count;
            int charCount = count;  // Assume 1 char / byte

            // Shouldn't have anything in fallback buffer for GetCharCount
            // (don't have to check m_throwOnOverflow for count)
            Debug.Assert(decoder == null ||
                !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
                "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at start");

            DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);

            // If we have a left over byte, use it
            if (decoder != null && decoder.bLeftOver > 0)
            {
                // We have a left over byte?
                if (count == 0)
                {
                    // No input though
                    if (!decoder.MustFlush)
                    {
                        // Don't have to flush
                        return 0;
                    }


                    Debug.Assert(fallbackBuffer == null,
                        "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer");
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);
                    fallbackHelper.InternalInitialize(bytes, null);

                    byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) };
                    return fallbackHelper.InternalFallback(byteBuffer, bytes);
                }

                // Get our full info
                int iBytes = decoder.bLeftOver << 8;
                iBytes |= (*bytes);
                bytes++;

                // This is either 1 known char or fallback
                // Already counted 1 char
                // Look up our bytes
                char cDecoder = mapBytesToUnicode[iBytes];
                if (cDecoder == 0 && iBytes != 0)
                {
                    // Deallocate preallocated one
                    charCount--;

                    // We'll need a fallback
                    Debug.Assert(fallbackBuffer == null,
                        "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer for unknown pair");
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);
                    fallbackHelper.InternalInitialize(byteEnd - count, null);

                    // Do fallback, we know there are 2 bytes
                    byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
                    charCount += fallbackHelper.InternalFallback(byteBuffer, bytes);
                }
                // else we already reserved space for this one.
            }

            // Loop, watch out for fallbacks
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                int iBytes = *bytes;
                bytes++;
                char c = mapBytesToUnicode[iBytes];

                // See if it was a double byte character
                if (c == LEAD_BYTE_CHAR)
                {
                    // It's a lead byte
                    charCount--; // deallocate preallocated lead byte
                    if (bytes < byteEnd)
                    {
                        // Have another to use, so use it
                        iBytes <<= 8;
                        iBytes |= *bytes;
                        bytes++;
                        c = mapBytesToUnicode[iBytes];
                    }
                    else
                    {
                        // No input left
                        if (decoder == null || decoder.MustFlush)
                        {
                            // have to flush anyway, set to unknown so we use fallback
                            charCount++; // reallocate deallocated lead byte
                            c = UNKNOWN_CHAR_FLAG;
                        }
                        else
                        {
                            // We'll stick it in decoder
                            break;
                        }
                    }
                }

                // See if it was unknown.
                // Unknown and known chars already allocated, but fallbacks aren't
                if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = DecoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;
                        fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);
                        fallbackHelper.InternalInitialize(byteEnd - count, null);
                    }

                    // Do fallback
                    charCount--;    // Get rid of preallocated extra char
                    byte[] byteBuffer = null;
                    if (iBytes < 0x100)
                        byteBuffer = new byte[] { unchecked((byte)iBytes) };
                    else
                        byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
                    charCount += fallbackHelper.InternalFallback(byteBuffer, bytes);
                }
            }

            // Shouldn't have anything in fallback buffer for GetChars
            Debug.Assert(decoder == null || !decoder.m_throwOnOverflow ||
                !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
                "[DBCSCodePageEncoding.GetCharCount]Expected empty fallback buffer at end");

            // Return our count
            return charCount;
        }
Example #16
0
        internal override unsafe int GetChars(byte *bytes, int byteCount,
                                              char *chars, int charCount, DecoderNLS decoder)
        {
            // Do it fast way if using ? replacement fallback
            byte *byteEnd   = bytes + byteCount;
            byte *byteStart = bytes;
            char *charStart = chars;

            // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
            // Only need decoder fallback buffer if not using ? fallback.
            // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
            {
                fallback = this.DecoderFallback as DecoderReplacementFallback;
            }
            else
            {
                fallback = decoder.Fallback as DecoderReplacementFallback;
            }

            if (fallback != null && fallback.MaxCharCount == 1)
            {
                // Try it the fast way
                char replacementChar = fallback.DefaultString[0];

                // Need byteCount chars, otherwise too small buffer
                if (charCount < byteCount)
                {
                    // Need at least 1 output byte, throw if must throw
                    ThrowCharsOverflow(decoder, charCount < 1);

                    // Not throwing, use what we can
                    byteEnd = bytes + charCount;
                }

                // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
                while (bytes < byteEnd)
                {
                    byte b = *(bytes++);
                    if (b >= 0x80)
                    {
                        // This is an invalid byte in the ASCII encoding.
                        *(chars++) = replacementChar;
                    }
                    else
                    {
                        *(chars++) = unchecked ((char)b);
                    }
                }

                // bytes & chars used are the same
                if (decoder != null)
                {
                    decoder.m_bytesUsed = (int)(bytes - byteStart);
                }
                return((int)(chars - charStart));
            }

            // Slower way's going to need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            byte[] byteBuffer = new byte[1];
            char * charEnd    = chars + charCount;

            // Not quite so fast loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                byte b = *(bytes);
                bytes++;

                if (b >= 0x80)
                {
                    // This is an invalid byte in the ASCII encoding.
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = b;

                    // Note that chars won't get updated unless this succeeds
                    if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
                    {
                        bytes--;                                            // unused byte
                        fallbackBuffer.InternalReset();                     // Didn't fall this back
                        ThrowCharsOverflow(decoder, chars == charStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }
                }
                else
                {
                    // Make sure we have buffer space
                    if (chars >= charEnd)
                    {
                        bytes--;                                            // unused byte
                        ThrowCharsOverflow(decoder, chars == charStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }

                    *(chars) = unchecked ((char)b);
                    chars++;
                }
            }

            // Might have had decoder fallback stuff.
            if (decoder != null)
                decoder.m_bytesUsed = (int)(bytes - byteStart); }
        internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder)
        {
            byte *numPtr  = bytes + byteCount;
            byte *numPtr2 = bytes;
            char *chPtr   = chars;
            DecoderReplacementFallback decoderFallback = null;

            if (decoder == null)
            {
                decoderFallback = base.DecoderFallback as DecoderReplacementFallback;
            }
            else
            {
                decoderFallback = decoder.Fallback as DecoderReplacementFallback;
            }
            if ((decoderFallback != null) && (decoderFallback.MaxCharCount == 1))
            {
                char ch = decoderFallback.DefaultString[0];
                if (charCount < byteCount)
                {
                    base.ThrowCharsOverflow(decoder, charCount < 1);
                    numPtr = bytes + charCount;
                }
                while (bytes < numPtr)
                {
                    bytes++;
                    byte num = bytes[0];
                    if (num >= 0x80)
                    {
                        chars++;
                        chars[0] = ch;
                    }
                    else
                    {
                        chars++;
                        chars[0] = (char)num;
                    }
                }
                if (decoder != null)
                {
                    decoder.m_bytesUsed = (int)((long)((bytes - numPtr2) / 1));
                }
                return((int)((long)((chars - chPtr) / 2)));
            }
            DecoderFallbackBuffer fallbackBuffer = null;

            byte[] buffer2 = new byte[1];
            char * charEnd = chars + charCount;

            while (bytes < numPtr)
            {
                byte num2 = bytes[0];
                bytes++;
                if (num2 >= 0x80)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(numPtr - byteCount, charEnd);
                    }
                    buffer2[0] = num2;
                    if (fallbackBuffer.InternalFallback(buffer2, bytes, ref chars))
                    {
                        continue;
                    }
                    bytes--;
                    fallbackBuffer.InternalReset();
                    base.ThrowCharsOverflow(decoder, chars == chPtr);
                    break;
                }
                if (chars >= charEnd)
                {
                    bytes--;
                    base.ThrowCharsOverflow(decoder, chars == chPtr);
                    break;
                }
                chars[0] = (char)num2;
                chars++;
            }
            if (decoder != null)
            {
                decoder.m_bytesUsed = (int)((long)((bytes - numPtr2) / 1));
            }
            return((int)((long)((chars - chPtr) / 2)));
        }
Example #18
0
 internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder)
 {
     if (charCount < byteCount)
     {
         this.ThrowCharsOverflow(decoder, charCount < 1);
         byteCount = charCount;
     }
     for (byte *numPtr = bytes + byteCount; bytes < numPtr; ++bytes)
     {
         *chars = (char)*bytes;
         chars += 2;
     }
     if (decoder != null)
     {
         decoder.m_bytesUsed = byteCount;
     }
     return(byteCount);
 }
Example #19
0
        // This is internal and called by something else,
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
            {
                fallback = this.DecoderFallback as DecoderReplacementFallback;
            }
            else
            {
                fallback = decoder.Fallback as DecoderReplacementFallback;
            }

            if (fallback != null && fallback.MaxCharCount == 1)
            {
                // Just return length, SBCS stay the same length because they don't map to surrogate
                // pairs and we don't have a decoder fallback.

                return(count);
            }

            // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
            DecoderFallbackBuffer fallbackBuffer = null;

            // Have to do it the hard way.
            // Assume charCount will be == count
            int charCount = count;

            byte[] byteBuffer = new byte[1];

            // Do it our fast way
            byte *byteEnd = bytes + count;

            // Quick loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                byte b = *bytes;
                bytes++;

                // If unknown we have to do fallback count
                if (b >= 0x80)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(byteEnd - count, null);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = b;
                    charCount--;            // Have to unreserve the one we already allocated for b
                    charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
                }
            }

            // Converted sequence is same length as input
            return(charCount);
        }
Example #20
0
        internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder)
        {
            UTF32Decoder          decoder        = (UTF32Decoder)baseDecoder;
            char *                chPtr          = chars;
            char *                chPtr2         = chars + charCount;
            byte *                numPtr         = bytes;
            byte *                numPtr2        = bytes + byteCount;
            int                   readByteCount  = 0;
            uint                  iChar          = 0;
            DecoderFallbackBuffer fallbackBuffer = null;

            if (decoder != null)
            {
                readByteCount  = decoder.readByteCount;
                iChar          = (uint)decoder.iChar;
                fallbackBuffer = baseDecoder.FallbackBuffer;
            }
            else
            {
                fallbackBuffer = base.decoderFallback.CreateFallbackBuffer();
            }
            fallbackBuffer.InternalInitialize(bytes, chars + charCount);
            while (bytes < numPtr2)
            {
                if (this.bigEndian)
                {
                    iChar = iChar << 8;
                    bytes++;
                    iChar += bytes[0];
                }
                else
                {
                    iChar = iChar >> 8;
                    bytes++;
                    iChar += (uint)(bytes[0] << 0x18);
                }
                readByteCount++;
                if (readByteCount >= 4)
                {
                    readByteCount = 0;
                    if ((iChar > 0x10ffff) || ((iChar >= 0xd800) && (iChar <= 0xdfff)))
                    {
                        byte[] buffer2;
                        if (this.bigEndian)
                        {
                            buffer2 = new byte[] { (byte)(iChar >> 0x18), (byte)(iChar >> 0x10), (byte)(iChar >> 8), (byte)iChar };
                        }
                        else
                        {
                            buffer2 = new byte[] { (byte)iChar, (byte)(iChar >> 8), (byte)(iChar >> 0x10), (byte)(iChar >> 0x18) };
                        }
                        if (!fallbackBuffer.InternalFallback(buffer2, bytes, ref chars))
                        {
                            bytes -= 4;
                            iChar  = 0;
                            fallbackBuffer.InternalReset();
                            base.ThrowCharsOverflow(decoder, chars == chPtr);
                            break;
                        }
                        iChar = 0;
                    }
                    else
                    {
                        if (iChar >= 0x10000)
                        {
                            if (chars >= (chPtr2 - 1))
                            {
                                bytes -= 4;
                                iChar  = 0;
                                base.ThrowCharsOverflow(decoder, chars == chPtr);
                                break;
                            }
                            chars++;
                            chars[0] = this.GetHighSurrogate(iChar);
                            iChar    = this.GetLowSurrogate(iChar);
                        }
                        else if (chars >= chPtr2)
                        {
                            bytes -= 4;
                            iChar  = 0;
                            base.ThrowCharsOverflow(decoder, chars == chPtr);
                            break;
                        }
                        chars++;
                        chars[0] = (char)iChar;
                        iChar    = 0;
                    }
                }
            }
            if ((readByteCount > 0) && ((decoder == null) || decoder.MustFlush))
            {
                byte[] buffer3 = new byte[readByteCount];
                int    num3    = readByteCount;
                if (!this.bigEndian)
                {
                    while (num3 > 0)
                    {
                        buffer3[--num3] = (byte)(iChar >> 0x18);
                        iChar           = iChar << 8;
                    }
                }
                else
                {
                    while (num3 > 0)
                    {
                        buffer3[--num3] = (byte)iChar;
                        iChar           = iChar >> 8;
                    }
                }
                if (!fallbackBuffer.InternalFallback(buffer3, bytes, ref chars))
                {
                    fallbackBuffer.InternalReset();
                    base.ThrowCharsOverflow(decoder, chars == chPtr);
                }
                else
                {
                    readByteCount = 0;
                    iChar         = 0;
                }
            }
            if (decoder != null)
            {
                decoder.iChar         = (int)iChar;
                decoder.readByteCount = readByteCount;
                decoder.m_bytesUsed   = (int)((long)((bytes - numPtr) / 1));
            }
            return((int)((long)((chars - chPtr) / 2)));
        }
Example #21
0
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder)
        {
            UTF32Decoder          decoder        = (UTF32Decoder)baseDecoder;
            int                   num            = 0;
            byte *                numPtr         = bytes + count;
            byte *                byteStart      = bytes;
            int                   readByteCount  = 0;
            uint                  iChar          = 0;
            DecoderFallbackBuffer fallbackBuffer = null;

            if (decoder != null)
            {
                readByteCount  = decoder.readByteCount;
                iChar          = (uint)decoder.iChar;
                fallbackBuffer = decoder.FallbackBuffer;
            }
            else
            {
                fallbackBuffer = base.decoderFallback.CreateFallbackBuffer();
            }
            fallbackBuffer.InternalInitialize(byteStart, null);
            while ((bytes < numPtr) && (num >= 0))
            {
                if (this.bigEndian)
                {
                    iChar = iChar << 8;
                    bytes++;
                    iChar += bytes[0];
                }
                else
                {
                    iChar = iChar >> 8;
                    bytes++;
                    iChar += (uint)(bytes[0] << 0x18);
                }
                readByteCount++;
                if (readByteCount >= 4)
                {
                    readByteCount = 0;
                    if ((iChar > 0x10ffff) || ((iChar >= 0xd800) && (iChar <= 0xdfff)))
                    {
                        byte[] buffer2;
                        if (this.bigEndian)
                        {
                            buffer2 = new byte[] { (byte)(iChar >> 0x18), (byte)(iChar >> 0x10), (byte)(iChar >> 8), (byte)iChar };
                        }
                        else
                        {
                            buffer2 = new byte[] { (byte)iChar, (byte)(iChar >> 8), (byte)(iChar >> 0x10), (byte)(iChar >> 0x18) };
                        }
                        num  += fallbackBuffer.InternalFallback(buffer2, bytes);
                        iChar = 0;
                    }
                    else
                    {
                        if (iChar >= 0x10000)
                        {
                            num++;
                        }
                        num++;
                        iChar = 0;
                    }
                }
            }
            if ((readByteCount > 0) && ((decoder == null) || decoder.MustFlush))
            {
                byte[] buffer3 = new byte[readByteCount];
                if (!this.bigEndian)
                {
                    while (readByteCount > 0)
                    {
                        buffer3[--readByteCount] = (byte)(iChar >> 0x18);
                        iChar = iChar << 8;
                    }
                }
                else
                {
                    while (readByteCount > 0)
                    {
                        buffer3[--readByteCount] = (byte)iChar;
                        iChar = iChar >> 8;
                    }
                }
                num += fallbackBuffer.InternalFallback(buffer3, bytes);
            }
            if (num < 0)
            {
                throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
            }
            return(num);
        }
Example #22
0
        internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder)
        {
            Decoder decoder = (Decoder)baseDecoder;

            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);
            int  bits      = 0;
            int  bitCount  = -1;
            bool firstByte = false;

            if (decoder != null)
            {
                bits      = decoder.bits;
                bitCount  = decoder.bitCount;
                firstByte = decoder.firstByte;
            }
            if (bitCount >= 0x10)
            {
                if (!buffer.AddChar((char)((bits >> (bitCount - 0x10)) & 0xffff)))
                {
                    base.ThrowCharsOverflow(decoder, true);
                }
                bitCount -= 0x10;
            }
            while (buffer.MoreData)
            {
                int  num4;
                byte nextByte = buffer.GetNextByte();
                if (bitCount >= 0)
                {
                    sbyte num5;
                    if ((nextByte < 0x80) && ((num5 = this.base64Values[nextByte]) >= 0))
                    {
                        firstByte = false;
                        bits      = (bits << 6) | ((byte)num5);
                        bitCount += 6;
                        if (bitCount < 0x10)
                        {
                            continue;
                        }
                        num4      = (bits >> (bitCount - 0x10)) & 0xffff;
                        bitCount -= 0x10;
                        goto Label_00FB;
                    }
                    bitCount = -1;
                    if (nextByte == 0x2d)
                    {
                        if (!firstByte)
                        {
                            continue;
                        }
                        num4 = 0x2b;
                        goto Label_00FB;
                    }
                    if (buffer.Fallback(nextByte))
                    {
                        continue;
                    }
                    break;
                }
                if (nextByte == 0x2b)
                {
                    bitCount  = 0;
                    firstByte = true;
                    continue;
                }
                if (nextByte >= 0x80)
                {
                    if (buffer.Fallback(nextByte))
                    {
                        continue;
                    }
                    break;
                }
                num4 = nextByte;
Label_00FB:
                if ((num4 >= 0) && !buffer.AddChar((char)num4))
                {
                    if (bitCount >= 0)
                    {
                        buffer.AdjustBytes(1);
                        bitCount += 0x10;
                    }
                    break;
                }
            }
            if ((chars != null) && (decoder != null))
            {
                if (decoder.MustFlush)
                {
                    decoder.bits      = 0;
                    decoder.bitCount  = -1;
                    decoder.firstByte = false;
                }
                else
                {
                    decoder.bits      = bits;
                    decoder.bitCount  = bitCount;
                    decoder.firstByte = firstByte;
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }
            return(buffer.Count);
        }
Example #23
0
        internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder)
        {
            base.CheckMemorySection();
            DBCSDecoder           decoder        = (DBCSDecoder)baseDecoder;
            byte *                numPtr         = bytes;
            byte *                numPtr2        = bytes + byteCount;
            char *                chPtr          = chars;
            char *                charEnd        = chars + charCount;
            bool                  flag           = false;
            DecoderFallbackBuffer fallbackBuffer = null;

            if ((decoder != null) && (decoder.bLeftOver > 0))
            {
                if (byteCount == 0)
                {
                    if (!decoder.MustFlush)
                    {
                        return(0);
                    }
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackBuffer.InternalInitialize(bytes, charEnd);
                    byte[] buffer2 = new byte[] { decoder.bLeftOver };
                    if (!fallbackBuffer.InternalFallback(buffer2, bytes, ref chars))
                    {
                        base.ThrowCharsOverflow(decoder, true);
                    }
                    decoder.bLeftOver = 0;
                    return((int)((long)((chars - chPtr) / 2)));
                }
                int index = decoder.bLeftOver << 8;
                index |= bytes[0];
                bytes++;
                char ch = this.mapBytesToUnicode[index];
                if ((ch == '\0') && (index != 0))
                {
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackBuffer.InternalInitialize(numPtr2 - byteCount, charEnd);
                    byte[] buffer3 = new byte[] { (byte)(index >> 8), (byte)index };
                    if (!fallbackBuffer.InternalFallback(buffer3, bytes, ref chars))
                    {
                        base.ThrowCharsOverflow(decoder, true);
                    }
                }
                else
                {
                    if (chars >= charEnd)
                    {
                        base.ThrowCharsOverflow(decoder, true);
                    }
                    chars++;
                    chars[0] = ch;
                }
            }
            while (bytes < numPtr2)
            {
                int num2 = bytes[0];
                bytes++;
                char ch2 = this.mapBytesToUnicode[num2];
                if (ch2 == 0xfffe)
                {
                    if (bytes < numPtr2)
                    {
                        num2  = num2 << 8;
                        num2 |= bytes[0];
                        bytes++;
                        ch2 = this.mapBytesToUnicode[num2];
                    }
                    else if ((decoder == null) || decoder.MustFlush)
                    {
                        ch2 = '\0';
                    }
                    else
                    {
                        flag = true;
                        decoder.bLeftOver = (byte)num2;
                        break;
                    }
                }
                if ((ch2 == '\0') && (num2 != 0))
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(numPtr2 - byteCount, charEnd);
                    }
                    byte[] buffer4 = null;
                    if (num2 < 0x100)
                    {
                        buffer4 = new byte[] { (byte)num2 };
                    }
                    else
                    {
                        buffer4 = new byte[] { (byte)(num2 >> 8), (byte)num2 };
                    }
                    if (fallbackBuffer.InternalFallback(buffer4, bytes, ref chars))
                    {
                        continue;
                    }
                    bytes -= buffer4.Length;
                    fallbackBuffer.InternalReset();
                    base.ThrowCharsOverflow(decoder, bytes == numPtr);
                    break;
                }
                if (chars >= charEnd)
                {
                    bytes--;
                    if (num2 >= 0x100)
                    {
                        bytes--;
                    }
                    base.ThrowCharsOverflow(decoder, bytes == numPtr);
                    break;
                }
                chars++;
                chars[0] = ch2;
            }
            if (decoder != null)
            {
                if (!flag)
                {
                    decoder.bLeftOver = 0;
                }
                decoder.m_bytesUsed = (int)((long)((bytes - numPtr) / 1));
            }
            return((int)((long)((chars - chPtr) / 2)));
        }
 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
 {
     int num4;
     byte* pSrc = bytes;
     byte* a = pSrc + count;
     int num = count;
     int ch = 0;
     DecoderFallbackBuffer fallback = null;
     if (baseDecoder != null)
     {
         UTF8Decoder decoder = (UTF8Decoder) baseDecoder;
         ch = decoder.bits;
         num -= ch >> 30;
     }
 Label_0027:
     if (pSrc >= a)
     {
         goto Label_0336;
     }
     if (ch == 0)
     {
         ch = pSrc[0];
         pSrc++;
         goto Label_010D;
     }
     int num3 = pSrc[0];
     pSrc++;
     if ((num3 & -64) != 0x80)
     {
         pSrc--;
         num += ch >> 30;
     }
     else
     {
         ch = (ch << 6) | (num3 & 0x3f);
         if ((ch & 0x20000000) == 0)
         {
             if ((ch & 0x10000000) != 0)
             {
                 if (((ch & 0x800000) != 0) || InRange(ch & 0x1f0, 0x10, 0x100))
                 {
                     goto Label_0027;
                 }
             }
             else if (((ch & 0x3e0) != 0) && ((ch & 0x3e0) != 0x360))
             {
                 goto Label_0027;
             }
         }
         else
         {
             if ((ch & 0x101f0000) == 0x10000000)
             {
                 num--;
             }
             goto Label_0183;
         }
     }
 Label_00C9:
     if (fallback == null)
     {
         if (baseDecoder == null)
         {
             fallback = base.decoderFallback.CreateFallbackBuffer();
         }
         else
         {
             fallback = baseDecoder.FallbackBuffer;
         }
         fallback.InternalInitialize(bytes, null);
     }
     num += this.FallbackInvalidByteSequence(pSrc, ch, fallback);
     ch = 0;
     goto Label_0027;
 Label_010D:
     if (ch > 0x7f)
     {
         num--;
         if ((ch & 0x40) == 0)
         {
             goto Label_00C9;
         }
         if ((ch & 0x20) != 0)
         {
             if ((ch & 0x10) != 0)
             {
                 ch &= 15;
                 if (ch > 4)
                 {
                     ch |= 240;
                     goto Label_00C9;
                 }
                 ch |= 0x504d0c00;
                 num--;
             }
             else
             {
                 ch = (ch & 15) | 0x48228000;
                 num--;
             }
         }
         else
         {
             ch &= 0x1f;
             if (ch <= 1)
             {
                 ch |= 0xc0;
                 goto Label_00C9;
             }
             ch |= 0x800000;
         }
         goto Label_0027;
     }
 Label_0183:
     num4 = PtrDiff(a, pSrc);
     if (num4 <= 13)
     {
         byte* numPtr3 = a;
         while (pSrc < numPtr3)
         {
             ch = pSrc[0];
             pSrc++;
             if (ch > 0x7f)
             {
                 goto Label_010D;
             }
         }
         ch = 0;
         goto Label_0336;
     }
     byte* numPtr4 = (pSrc + num4) - 7;
     while (pSrc < numPtr4)
     {
         int num6;
         ch = pSrc[0];
         pSrc++;
         if (ch > 0x7f)
         {
             goto Label_025A;
         }
         if ((((int) pSrc) & 1) != 0)
         {
             ch = pSrc[0];
             pSrc++;
             if (ch > 0x7f)
             {
                 goto Label_025A;
             }
         }
         if ((((int) pSrc) & 2) != 0)
         {
             ch = *((ushort*) pSrc);
             if ((ch & 0x8080) != 0)
             {
                 goto Label_0245;
             }
             pSrc += 2;
         }
         while (pSrc < numPtr4)
         {
             ch = *((int*) pSrc);
             int num5 = *((int*) (pSrc + 4));
             if (((ch | num5) & -2139062144) != 0)
             {
                 goto Label_0245;
             }
             pSrc += 8;
             if (pSrc >= numPtr4)
             {
                 break;
             }
             ch = *((int*) pSrc);
             num5 = *((int*) (pSrc + 4));
             if (((ch | num5) & -2139062144) != 0)
             {
                 goto Label_0245;
             }
             pSrc += 8;
         }
         break;
     Label_0245:
         ch &= 0xff;
         pSrc++;
         if (ch <= 0x7f)
         {
             continue;
         }
     Label_025A:
         num6 = pSrc[0];
         pSrc++;
         if (((ch & 0x40) == 0) || ((num6 & -64) != 0x80))
         {
             goto Label_032A;
         }
         num6 &= 0x3f;
         if ((ch & 0x20) != 0)
         {
             num6 |= (ch & 15) << 6;
             if ((ch & 0x10) != 0)
             {
                 ch = pSrc[0];
                 if (!InRange(num6 >> 4, 1, 0x10) || ((ch & -64) != 0x80))
                 {
                     goto Label_032A;
                 }
                 num6 = (num6 << 6) | (ch & 0x3f);
                 ch = pSrc[1];
                 if ((ch & -64) != 0x80)
                 {
                     goto Label_032A;
                 }
                 pSrc += 2;
                 num--;
             }
             else
             {
                 ch = pSrc[0];
                 if ((((num6 & 0x3e0) == 0) || ((num6 & 0x3e0) == 0x360)) || ((ch & -64) != 0x80))
                 {
                     goto Label_032A;
                 }
                 pSrc++;
                 num--;
             }
         }
         else if ((ch & 30) == 0)
         {
             goto Label_032A;
         }
         num--;
     }
     ch = 0;
     goto Label_0027;
 Label_032A:
     pSrc -= 2;
     ch = 0;
     goto Label_0027;
 Label_0336:
     if (ch == 0)
     {
         return num;
     }
     num += ch >> 30;
     if ((baseDecoder != null) && !baseDecoder.MustFlush)
     {
         return num;
     }
     if (fallback == null)
     {
         if (baseDecoder == null)
         {
             fallback = base.decoderFallback.CreateFallbackBuffer();
         }
         else
         {
             fallback = baseDecoder.FallbackBuffer;
         }
         fallback.InternalInitialize(bytes, null);
     }
     return (num + this.FallbackInvalidByteSequence(pSrc, ch, fallback));
 }
Example #25
0
 public unsafe abstract int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS decoder);
 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
 {
     return this.GetChars(bytes, count, null, 0, baseDecoder);
 }
Example #27
0
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;

            byte* byteEnd = bytes + count;
            byte* byteStart = bytes;

            // Need last vars
            int lastByte = -1;
            char lastChar = (char)0;

            // Start by assuming same # of chars as bytes
            int charCount = count >> 1;

            // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
            // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
            // will go from longEnd - 1 long to longEnd. (Might not get to use this)
            ulong* longEnd = (ulong*)(byteEnd - 7);

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            if (decoder != null)
            {
                lastByte = decoder.lastByte;
                lastChar = decoder.lastChar;

                // Assume extra char if last char was around
                if (lastChar > 0)
                    charCount++;

                // Assume extra char if extra last byte makes up odd # of input bytes
                if (lastByte >= 0 && (count & 1) == 1)
                {
                    charCount++;
                }
            }

            while (bytes < byteEnd)
            {
                // If we're aligned then maybe we can do it fast
                // This'll hurt if we're unaligned because we'll always test but never be aligned
#if !NO_FAST_UNICODE_LOOP
#if BIGENDIAN
                if (bigEndian &&
#else // BIGENDIAN
                if (!bigEndian &&
#endif // BIGENDIAN
#if WIN64 // win64 has to be long aligned
                    (unchecked((long)bytes) & 7) == 0 &&
#else
 (unchecked((int)bytes) & 3) == 0 &&
#endif // WIN64
 lastByte == -1 && lastChar == 0)
                {
                    // Need new char* so we can check 4 at a time
                    ulong* longBytes = (ulong*)bytes;

                    while (longBytes < longEnd)
                    {
                        // See if we potentially have surrogates (0x8000 bit set)
                        // (We're either big endian on a big endian machine or little endian on 
                        // a little endian machine so this'll work)
                        if ((0x8000800080008000 & *longBytes) != 0)
                        {
                            // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
                            // 5 bits looks like 11011, then its a high or low surrogate.
                            // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
                            // Note that we expect BMP characters to be more common than surrogates
                            // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
                            ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;

                            // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
                            // but no clue if they're high or low.
                            // If each of the 4 characters are non-zero, then none are surrogates.
                            if ((uTemp & 0xFFFF000000000000) == 0 ||
                                (uTemp & 0x0000FFFF00000000) == 0 ||
                                (uTemp & 0x00000000FFFF0000) == 0 ||
                                (uTemp & 0x000000000000FFFF) == 0)
                            {
                                // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
                                // or if there's 1 or 4 surrogates

                                // If they happen to be high/low/high/low, we may as well continue.  Check the next
                                // bit to see if its set (low) or not (high) in the right pattern
#if BIGENDIAN
                                if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
#else
                                if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
#endif
                                {
                                    // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
                                    // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.

                                    // Drop out to the slow loop to resolve the surrogates
                                    break;
                                }
                                // else they are all surrogates in High/Low/High/Low order, so we can use them.
                            }
                            // else none are surrogates, so we can use them.
                        }
                        // else all < 0x8000 so we can use them

                        // We can use these 4 chars.
                        longBytes++;
                    }

                    bytes = (byte*)longBytes;

                    if (bytes >= byteEnd)
                        break;
                }
#endif // !NO_FAST_UNICODE_LOOP

                // Get 1st byte
                if (lastByte < 0)
                {
                    lastByte = *bytes++;
                    if (bytes >= byteEnd) break;
                }

                // Get full char
                char ch;
                if (bigEndian)
                {
                    ch = (char)(lastByte << 8 | *(bytes++));
                }
                else
                {
                    ch = (char)(*(bytes++) << 8 | lastByte);
                }
                lastByte = -1;

                // See if the char's valid
                if (ch >= 0xd800 && ch <= 0xdfff)
                {
                    // Was it a high surrogate?
                    if (ch <= 0xdbff)
                    {
                        // Its a high surrogate, if we had one then do fallback for previous one
                        if (lastChar > 0)
                        {
                            // Ignore previous bad high surrogate
                            charCount--;

                            // Get fallback for previous high surrogate
                            // Note we have to reconstruct bytes because some may have been in decoder
                            byte[] byteBuffer = null;
                            if (bigEndian)
                            {
                                byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
                            }
                            else
                            {
                                byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };

                            }

                            if (fallbackBuffer == null)
                            {
                                if (decoder == null)
                                    fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                                else
                                    fallbackBuffer = decoder.FallbackBuffer;

                                // Set our internal fallback interesting things.
                                fallbackBuffer.InternalInitialize(byteStart, null);
                            }

                            // Get fallback.
                            charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
                        }

                        // Ignore the last one which fell back already,
                        // and remember the new high surrogate
                        lastChar = ch;
                        continue;
                    }

                    // Its a low surrogate
                    if (lastChar == 0)
                    {
                        // Expected a previous high surrogate
                        charCount--;

                        // Get fallback for this low surrogate
                        // Note we have to reconstruct bytes because some may have been in decoder
                        byte[] byteBuffer = null;
                        if (bigEndian)
                        {
                            byteBuffer = new byte[] { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
                        }
                        else
                        {
                            byteBuffer = new byte[] { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };

                        }

                        if (fallbackBuffer == null)
                        {
                            if (decoder == null)
                                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                            else
                                fallbackBuffer = decoder.FallbackBuffer;

                            // Set our internal fallback interesting things.
                            fallbackBuffer.InternalInitialize(byteStart, null);
                        }

                        charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);

                        // Ignore this one (we already did its fallback)
                        continue;
                    }

                    // Valid surrogate pair, already counted.
                    lastChar = (char)0;
                }
                else if (lastChar > 0)
                {
                    // Had a high surrogate, expected a low surrogate
                    // Uncount the last high surrogate
                    charCount--;

                    // fall back the high surrogate.
                    byte[] byteBuffer = null;
                    if (bigEndian)
                    {
                        byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
                    }
                    else
                    {
                        byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };

                    }

                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;

                        // Set our internal fallback interesting things.
                        fallbackBuffer.InternalInitialize(byteStart, null);
                    }

                    // Already subtracted high surrogate
                    charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);

                    // Not left over now, clear previous high surrogate and continue to add current char
                    lastChar = (char)0;
                }

                // Valid char, already counted
            }

            // Extra space if we can't use decoder
            if (decoder == null || decoder.MustFlush)
            {
                if (lastChar > 0)
                {
                    // No hanging high surrogates allowed, do fallback and remove count for it
                    charCount--;
                    byte[] byteBuffer = null;
                    if (bigEndian)
                    {
                        byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
                    }
                    else
                    {
                        byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };

                    }

                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;

                        // Set our internal fallback interesting things.
                        fallbackBuffer.InternalInitialize(byteStart, null);
                    }

                    charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);

                    lastChar = (char)0;
                }

                if (lastByte >= 0)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;

                        // Set our internal fallback interesting things.
                        fallbackBuffer.InternalInitialize(byteStart, null);
                    }

                    // No hanging odd bytes allowed if must flush
                    charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
                    lastByte = -1;
                }
            }

            // If we had a high surrogate left over, we can't count it
            if (lastChar > 0)
                charCount--;

            return charCount;
        }
 internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)
 {
     ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder;
     Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);
     int defaultCodePage = this.defaultCodePage;
     bool bLastATR = false;
     bool bLastVirama = false;
     bool bLastDevenagariStressAbbr = false;
     char cLastCharForNextNukta = '\0';
     char cLastCharForNoNextNukta = '\0';
     if (decoder != null)
     {
         defaultCodePage = decoder.currentCodePage;
         bLastATR = decoder.bLastATR;
         bLastVirama = decoder.bLastVirama;
         bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr;
         cLastCharForNextNukta = decoder.cLastCharForNextNukta;
         cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta;
     }
     bool flag4 = ((bLastVirama | bLastATR) | bLastDevenagariStressAbbr) | (cLastCharForNextNukta != '\0');
     int num2 = -1;
     if ((defaultCodePage >= 2) && (defaultCodePage <= 11))
     {
         num2 = IndicMappingIndex[defaultCodePage];
     }
     while (buffer.MoreData)
     {
         byte nextByte = buffer.GetNextByte();
         if (flag4)
         {
             flag4 = false;
             if (bLastATR)
             {
                 if ((nextByte >= 0x42) && (nextByte <= 0x4b))
                 {
                     defaultCodePage = nextByte & 15;
                     num2 = IndicMappingIndex[defaultCodePage];
                     bLastATR = false;
                     continue;
                 }
                 if (nextByte == 0x40)
                 {
                     defaultCodePage = this.defaultCodePage;
                     num2 = -1;
                     if ((defaultCodePage >= 2) && (defaultCodePage <= 11))
                     {
                         num2 = IndicMappingIndex[defaultCodePage];
                     }
                     bLastATR = false;
                     continue;
                 }
                 if (nextByte == 0x41)
                 {
                     defaultCodePage = this.defaultCodePage;
                     num2 = -1;
                     if ((defaultCodePage >= 2) && (defaultCodePage <= 11))
                     {
                         num2 = IndicMappingIndex[defaultCodePage];
                     }
                     bLastATR = false;
                     continue;
                 }
                 if (!buffer.Fallback((byte) 0xef))
                 {
                     break;
                 }
                 bLastATR = false;
             }
             else if (bLastVirama)
             {
                 if (nextByte == 0xe8)
                 {
                     if (!buffer.AddChar('‌'))
                     {
                         break;
                     }
                     bLastVirama = false;
                     continue;
                 }
                 if (nextByte == 0xe9)
                 {
                     if (!buffer.AddChar('‍'))
                     {
                         break;
                     }
                     bLastVirama = false;
                     continue;
                 }
                 bLastVirama = false;
             }
             else if (bLastDevenagariStressAbbr)
             {
                 if (nextByte == 0xb8)
                 {
                     if (!buffer.AddChar('॒'))
                     {
                         break;
                     }
                     bLastDevenagariStressAbbr = false;
                     continue;
                 }
                 if (nextByte == 0xbf)
                 {
                     if (!buffer.AddChar('॰'))
                     {
                         break;
                     }
                     bLastDevenagariStressAbbr = false;
                     continue;
                 }
                 if (!buffer.Fallback((byte) 240))
                 {
                     break;
                 }
                 bLastDevenagariStressAbbr = false;
             }
             else
             {
                 if (nextByte == 0xe9)
                 {
                     if (!buffer.AddChar(cLastCharForNextNukta))
                     {
                         break;
                     }
                     cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
                     continue;
                 }
                 if (!buffer.AddChar(cLastCharForNoNextNukta))
                 {
                     break;
                 }
                 cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
             }
         }
         if (nextByte < 160)
         {
             if (buffer.AddChar((char) nextByte))
             {
                 continue;
             }
             break;
         }
         if (nextByte == 0xef)
         {
             bLastATR = flag4 = true;
         }
         else
         {
             char ch = IndicMapping[num2, 0, nextByte - 160];
             char ch4 = IndicMapping[num2, 1, nextByte - 160];
             if ((ch4 == '\0') || (nextByte == 0xe9))
             {
                 if (ch == '\0')
                 {
                     if (buffer.Fallback(nextByte))
                     {
                         continue;
                     }
                 }
                 else if (buffer.AddChar(ch))
                 {
                     continue;
                 }
                 break;
             }
             if (nextByte == 0xe8)
             {
                 if (!buffer.AddChar(ch))
                 {
                     break;
                 }
                 bLastVirama = flag4 = true;
             }
             else
             {
                 if ((ch4 & 0xf000) == 0)
                 {
                     flag4 = true;
                     cLastCharForNextNukta = ch4;
                     cLastCharForNoNextNukta = ch;
                     continue;
                 }
                 bLastDevenagariStressAbbr = flag4 = true;
             }
         }
     }
     if ((decoder == null) || decoder.MustFlush)
     {
         if (bLastATR)
         {
             if (buffer.Fallback((byte) 0xef))
             {
                 bLastATR = false;
             }
             else
             {
                 buffer.GetNextByte();
             }
         }
         else if (bLastDevenagariStressAbbr)
         {
             if (buffer.Fallback((byte) 240))
             {
                 bLastDevenagariStressAbbr = false;
             }
             else
             {
                 buffer.GetNextByte();
             }
         }
         else if (cLastCharForNoNextNukta != '\0')
         {
             if (buffer.AddChar(cLastCharForNoNextNukta))
             {
                 cLastCharForNoNextNukta = cLastCharForNextNukta = '\0';
             }
             else
             {
                 buffer.GetNextByte();
             }
         }
     }
     if ((decoder != null) && (chars != null))
     {
         if ((!decoder.MustFlush || (cLastCharForNoNextNukta != '\0')) || (bLastATR || bLastDevenagariStressAbbr))
         {
             decoder.currentCodePage = defaultCodePage;
             decoder.bLastVirama = bLastVirama;
             decoder.bLastATR = bLastATR;
             decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr;
             decoder.cLastCharForNextNukta = cLastCharForNextNukta;
             decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta;
         }
         else
         {
             decoder.currentCodePage = this.defaultCodePage;
             decoder.bLastVirama = false;
             decoder.bLastATR = false;
             decoder.bLastDevenagariStressAbbr = false;
             decoder.cLastCharForNextNukta = '\0';
             decoder.cLastCharForNoNextNukta = '\0';
         }
         decoder.m_bytesUsed = buffer.BytesUsed;
     }
     return buffer.Count;
 }
 // Workhorse
 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
 {
     // Just call GetChars with null chars saying we want count
     return GetChars(bytes, count, null, 0, baseDecoder);
 }
Example #30
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            // Just assert, we're called internally so these should be safe, checked already
            Contract.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null");
            Contract.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative");

            // Just call getChars with null char* to get count
            return GetChars(bytes, count, null, 0, baseDecoder);
        }
Example #31
0
            [System.Security.SecurityCritical]  // auto-generated
            internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
                                                    byte* byteStart, int byteCount)
            {
                this.enc = enc;
                this.decoder = decoder;

                this.chars = charStart;
                this.charStart = charStart;
                this.charEnd = charStart + charCount;

                this.byteStart = byteStart;
                this.bytes = byteStart;
                this.byteEnd = byteStart + byteCount;

                if (this.decoder == null)
                    this.fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
                else
                    this.fallbackBuffer = this.decoder.FallbackBuffer;

                // If we're getting chars or getting char count we don't expect to have
                // to remember fallbacks between calls (so it should be empty)
                Contract.Assert(fallbackBuffer.Remaining == 0,
                    "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
                fallbackBuffer.InternalInitialize(bytes, charEnd);
            }
Example #32
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            Contract.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null");
            Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative");
            Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative");

            // Fix our decoder
            ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder;
            int iCount = 0;

            switch (CodePage)
            {
                case 50220:
                case 50221:
                case 50222:
                    iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder);
                    break;
                case 50225:
                    iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder);
                    break;
                    // Currently 50227 is the same as 936
//                case 50227:
  //                  iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder);
    //                break;
                case 52936:
                    iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder);
                    break;
                default:
                    Contract.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page");
                    break;
            }

            return iCount;
        }
Example #33
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS decoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            Contract.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null");
            Contract.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative");
            Contract.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null");
            Contract.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative");

            // Do it fast way if using ? replacement fallback
            byte* byteEnd = bytes + byteCount;
            byte* byteStart = bytes;
            char* charStart = chars;

            // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
            // Only need decoder fallback buffer if not using ? fallback.
            // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
                fallback = this.DecoderFallback as DecoderReplacementFallback;
            else
            {
                fallback = decoder.Fallback as DecoderReplacementFallback;
                Contract.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
                    decoder.FallbackBuffer.Remaining == 0,
                    "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer");
            }

            if (fallback != null && fallback.MaxCharCount == 1)
            {
                // Try it the fast way
                char replacementChar = fallback.DefaultString[0];

                // Need byteCount chars, otherwise too small buffer
                if (charCount < byteCount)
                {
                    // Need at least 1 output byte, throw if must throw
                    ThrowCharsOverflow(decoder, charCount < 1);

                    // Not throwing, use what we can
                    byteEnd = bytes + charCount;
                }

                // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
                while (bytes < byteEnd)
                {
                    byte b = *(bytes++);
                    if (b >= 0x80)
                        // This is an invalid byte in the ASCII encoding.
                        *(chars++) = replacementChar;
                    else
                        *(chars++) = unchecked((char)b);
                }

                // bytes & chars used are the same
                if (decoder != null)
                    decoder.m_bytesUsed = (int)(bytes - byteStart);
                return (int)(chars - charStart);
            }

            // Slower way's going to need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;
            byte[] byteBuffer = new byte[1];
            char*   charEnd = chars + charCount;

            // Not quite so fast loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                byte b = *(bytes);
                bytes++;

                if (b >= 0x80)
                {
                    // This is an invalid byte in the ASCII encoding.
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;
                        fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = b;

                    // Note that chars won't get updated unless this succeeds
                    if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
                    {
                        // May or may not throw, but we didn't get this byte
                        Contract.Assert(bytes > byteStart || chars == charStart,
                            "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)");
                        bytes--;                                            // unused byte
                        fallbackBuffer.InternalReset();                     // Didn't fall this back
                        ThrowCharsOverflow(decoder, chars == charStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }
                }
                else
                {
                    // Make sure we have buffer space
                    if (chars >= charEnd)
                    {
                        Contract.Assert(bytes > byteStart || chars == charStart,
                            "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)");
                        bytes--;                                            // unused byte
                        ThrowCharsOverflow(decoder, chars == charStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }

                    *(chars) = unchecked((char)b);
                    chars++;
                }
            }

            // Might have had decoder fallback stuff.
            if (decoder != null)
                decoder.m_bytesUsed = (int)(bytes - byteStart);

            // Expect Empty fallback buffer for GetChars
            Contract.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
                "[ASCIIEncoding.GetChars]Expected Empty fallback buffer");

            return (int)(chars - charStart);
        }
Example #34
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            Contract.Assert(count >=0, "[UTF7Encoding.GetCharCount]count >=0");
            Contract.Assert(bytes!=null, "[UTF7Encoding.GetCharCount]bytes!=null");

            // Just call GetChars with null char* to do counting
            return GetChars(bytes, count, null, 0, baseDecoder);
        }
Example #35
0
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS decoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            Debug.Assert(bytes != null, "[Latin1Encoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[Latin1Encoding.GetChars]byteCount is negative");
            Debug.Assert(chars != null, "[Latin1Encoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[Latin1Encoding.GetChars]charCount is negative");

            // Need byteCount chars, otherwise too small buffer
            if (charCount < byteCount)
            {
                // Buffer too small.  Do we throw?
                ThrowCharsOverflow(decoder, charCount < 1);

                // Don't throw, correct buffer size
                byteCount = charCount;
            }

            // Do it our fast way
            byte* byteEnd = bytes + byteCount;

            // Quick loop, all bytes are the same as chars, so no fallbacks for latin1
            while (bytes < byteEnd)
            {
                *(chars) = unchecked((char)*(bytes));
                chars++;
                bytes++;
            }

            // Might need to know input bytes used
            if (decoder != null)
                decoder.m_bytesUsed = byteCount;

            // Converted sequence is same length as input, so output charsUsed is same as byteCount;
            return byteCount;
        }
 internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
 {
     return this.GetCharCount(bytes, count);
 }
Example #37
0
 internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder)
 {
     return(this.GetChars(bytes, count, (char *)null, 0, baseDecoder));
 }
 internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
 {
     if (((decoder == null) || decoder.m_throwOnOverflow) || nothingDecoded)
     {
         if ((decoder != null) && decoder.InternalHasFallbackBuffer)
         {
             decoder.FallbackBuffer.InternalReset();
         }
         this.ThrowCharsOverflow();
     }
     decoder.ClearMustFlush();
 }
 internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount)
 {
     this.enc = enc;
     this.decoder = decoder;
     this.chars = charStart;
     this.charStart = charStart;
     this.charEnd = charStart + charCount;
     this.byteStart = byteStart;
     this.bytes = byteStart;
     this.byteEnd = byteStart + byteCount;
     if (this.decoder == null)
     {
         this.fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
     }
     else
     {
         this.fallbackBuffer = this.decoder.FallbackBuffer;
     }
     this.fallbackBuffer.InternalInitialize(this.bytes, this.charEnd);
 }
Example #40
0
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder)
        {
            base.CheckMemorySection();
            DBCSDecoder           decoder        = (DBCSDecoder)baseDecoder;
            DecoderFallbackBuffer fallbackBuffer = null;
            byte *numPtr = bytes + count;
            int   num    = count;

            if ((decoder != null) && (decoder.bLeftOver > 0))
            {
                if (count == 0)
                {
                    if (!decoder.MustFlush)
                    {
                        return(0);
                    }
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackBuffer.InternalInitialize(bytes, null);
                    byte[] buffer2 = new byte[] { decoder.bLeftOver };
                    return(fallbackBuffer.InternalFallback(buffer2, bytes));
                }
                int index = decoder.bLeftOver << 8;
                index |= bytes[0];
                bytes++;
                if ((this.mapBytesToUnicode[index] == '\0') && (index != 0))
                {
                    num--;
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackBuffer.InternalInitialize(numPtr - count, null);
                    byte[] buffer3 = new byte[] { (byte)(index >> 8), (byte)index };
                    num += fallbackBuffer.InternalFallback(buffer3, bytes);
                }
            }
            while (bytes < numPtr)
            {
                int num3 = bytes[0];
                bytes++;
                char ch2 = this.mapBytesToUnicode[num3];
                if (ch2 == 0xfffe)
                {
                    num--;
                    if (bytes < numPtr)
                    {
                        num3  = num3 << 8;
                        num3 |= bytes[0];
                        bytes++;
                        ch2 = this.mapBytesToUnicode[num3];
                    }
                    else
                    {
                        if ((decoder != null) && !decoder.MustFlush)
                        {
                            return(num);
                        }
                        num++;
                        ch2 = '\0';
                    }
                }
                if ((ch2 == '\0') && (num3 != 0))
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = base.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(numPtr - count, null);
                    }
                    num--;
                    byte[] buffer4 = null;
                    if (num3 < 0x100)
                    {
                        buffer4 = new byte[] { (byte)num3 };
                    }
                    else
                    {
                        buffer4 = new byte[] { (byte)(num3 >> 8), (byte)num3 };
                    }
                    num += fallbackBuffer.InternalFallback(buffer4, bytes);
                }
            }
            return(num);
        }
 internal virtual unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder)
 {
     return this.GetChars(bytes, byteCount, chars, charCount);
 }
Example #42
0
        // This is internal and called by something else,
        public override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            // Just assert, we're called internally so these should be safe, checked already
            Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetCharCount]bytes is null");
            Debug.Assert(count >= 0, "[SBCSCodePageEncoding.GetCharCount]byteCount is negative");

            CheckMemorySection();

            // See if we have best fit
            bool bUseBestFit = false;

            // Only need decoder fallback buffer if not using default replacement fallback or best fit fallback.
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
            {
                fallback    = DecoderFallback as DecoderReplacementFallback;
                bUseBestFit = DecoderFallback is InternalDecoderBestFitFallback;
            }
            else
            {
                fallback    = decoder.Fallback as DecoderReplacementFallback;
                bUseBestFit = decoder.Fallback is InternalDecoderBestFitFallback;
                Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
                             decoder.FallbackBuffer.Remaining == 0,
                             "[SBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");
            }

            if (bUseBestFit || (fallback != null && fallback.MaxCharCount == 1))
            {
                // Just return length, SBCS stay the same length because they don't map to surrogate
                // pairs and we don't have a decoder fallback.
                return(count);
            }

            // Might need one of these later
            DecoderFallbackBuffer       fallbackBuffer = null;
            DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);

            // Have to do it the hard way.
            // Assume charCount will be == count
            int charCount = count;

            byte[] byteBuffer = new byte[1];

            // Do it our fast way
            byte *byteEnd = bytes + count;

            // Quick loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                char c;
                c = _mapBytesToUnicode[*bytes];
                bytes++;

                // If unknown we have to do fallback count
                if (c == UNKNOWN_CHAR)
                {
                    // Must have a fallback buffer
                    if (fallbackBuffer == null)
                    {
                        // Need to adjust count so we get real start
                        if (decoder == null)
                        {
                            fallbackBuffer = DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }

                        fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);

                        fallbackHelper.InternalInitialize(byteEnd - count, null);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = *(bytes - 1);
                    charCount--;                            // We'd already reserved one for *(bytes-1)
                    charCount += fallbackHelper.InternalFallback(byteBuffer, bytes);
                }
            }

            // Fallback buffer must be empty
            Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
                         "[SBCSEncoding.GetCharCount]Expected Empty fallback buffer at end");

            // Converted sequence is same length as input
            return(charCount);
        }
Example #43
0
 public unsafe abstract int GetCharCount(byte *bytes, int count, DecoderNLS decoder);
Example #44
0
        // This is internal and called by something else,
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS decoder)
        {
            // Just assert, we're called internally so these should be safe, checked already
            Debug.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null");
            Debug.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative");

            // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
            {
                fallback = this.DecoderFallback as DecoderReplacementFallback;
            }
            else
            {
                fallback = decoder.Fallback as DecoderReplacementFallback;
                Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
                             decoder.FallbackBuffer.Remaining == 0,
                             "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer");
            }

            if (fallback != null && fallback.MaxCharCount == 1)
            {
                // Just return length, SBCS stay the same length because they don't map to surrogate
                // pairs and we don't have a decoder fallback.

                return(count);
            }

            // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
            DecoderFallbackBuffer fallbackBuffer = null;

            // Have to do it the hard way.
            // Assume charCount will be == count
            int charCount = count;

            byte[] byteBuffer = new byte[1];

            // Do it our fast way
            byte *byteEnd = bytes + count;

            // Quick loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                byte b = *bytes;
                bytes++;

                // If unknown we have to do fallback count
                if (b >= 0x80)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(byteEnd - count, null);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = b;
                    charCount--;            // Have to unreserve the one we already allocated for b
                    charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
                }
            }

            // Fallback buffer must be empty
            Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
                         "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer");

            // Converted sequence is same length as input
            return(charCount);
        }
Example #45
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            Contract.Assert(byteCount >=0, "[UTF7Encoding.GetChars]byteCount >=0");
            Contract.Assert(bytes!=null, "[UTF7Encoding.GetChars]bytes!=null");
            Contract.Assert(charCount >=0, "[UTF7Encoding.GetChars]charCount >=0");

            // Might use a decoder
            UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder) baseDecoder;

            // Get our output buffer info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount);

            // Get decoder info
            int bits = 0;
            int bitCount = -1;
            bool firstByte = false;
            if (decoder != null)
            {
                bits = decoder.bits;
                bitCount = decoder.bitCount;
                firstByte = decoder.firstByte;

                Contract.Assert(firstByte == false || decoder.bitCount <= 0,
                    "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
            }

            // We may have had bits in the decoder that we couldn't output last time, so do so now
            if (bitCount >= 16)
            {
                // Check our decoder buffer
                if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
                    ThrowCharsOverflow(decoder, true);  // Always throw, they need at least 1 char even in Convert

                // Used this one, clean up extra bits
                bitCount -= 16;
            }

            // Loop through the input
            while (buffer.MoreData)
            {
                byte currentByte = buffer.GetNextByte();
                int c;

                if (bitCount >= 0)
                {
                    //
                    // Modified base 64 encoding.
                    //
                    sbyte v;
                    if (currentByte < 0x80 && ((v = base64Values[currentByte]) >=0))
                    {
                        firstByte = false;
                        bits = (bits << 6) | ((byte)v);
                        bitCount += 6;
                        if (bitCount >= 16)
                        {
                            c = (bits >> (bitCount - 16)) & 0xFFFF;
                            bitCount -= 16;
                        }
                        // If not enough bits just continue
                        else continue;
                    }
                    else
                    {
                        // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
                        bitCount = -1;

                        if (currentByte != '-')
                        {
                            // >= 0x80 (because of 1st if statemtn)
                            // We need this check since the base64Values[b] check below need b <= 0x7f.
                            // This is not a valid base 64 byte.  Terminate the shifted-sequence and
                            // emit this byte.

                            // not in base 64 table
                            // According to the RFC 1642 and the example code of UTF-7
                            // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte

                            // Chars won't be updated unless this works, try to fallback
                            if (!buffer.Fallback(currentByte))
                                break;                                          // Stop here, didn't throw

                            // Used that byte, we're done with it
                            continue;
                        }

                        //
                        // The encoding for '+' is "+-".
                        //
                        if (firstByte) c = '+';
                        // We just turn it off if not emitting a +, so we're done.
                        else continue;
                    }
                    //
                    // End of modified base 64 encoding block.
                    //
                }
                else if (currentByte == '+')
                {
                    //
                    // Found the start of a modified base 64 encoding block or a plus sign.
                    //
                    bitCount = 0;
                    firstByte = true;
                    continue;
                }
                else
                {
                    // Normal character
                    if (currentByte >= 0x80)
                    {
                        // Try to fallback
                        if (!buffer.Fallback(currentByte))
                            break;                                          // Stop here, didn't throw

                        // Done falling back
                        continue;
                    }

                    // Use the normal character
                    c = currentByte;
                }

                if (c >= 0)
                {
                    // Check our buffer
                    if (!buffer.AddChar((char)c))
                    {
                        // No room.  If it was a plain char we'll try again later.
                        // Note, we'll consume this byte and stick it in decoder, even if we can't output it
                        if (bitCount >= 0)                                  // Can we rememmber this byte (char)
                        {
                            buffer.AdjustBytes(+1);                         // Need to readd the byte that AddChar subtracted when it failed
                            bitCount += 16;                                 // We'll still need that char we have in our bits
                        }
                        break;                                              // didn't throw, stop
                    }
                }
            }

            // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
            if (chars != null && decoder != null)
            {
                // MustFlush?  (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
                if (decoder.MustFlush)
                {
                    // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
                    decoder.bits = 0;
                    decoder.bitCount = -1;
                    decoder.firstByte = false;
                }
                else
                {
                    decoder.bits = bits;
                    decoder.bitCount = bitCount;
                    decoder.firstByte = firstByte;
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }
            // else ignore any hanging bits.

            // Return our count
            return buffer.Count;
        }
Example #46
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            Contract.Assert(count >=0, "[UTF8Encoding.GetCharCount]count >=0");
            Contract.Assert(bytes!=null, "[UTF8Encoding.GetCharCount]bytes!=null");

            // Initialize stuff
            byte *pSrc = bytes;
            byte *pEnd = pSrc+count;

            // Start by assuming we have as many as count, charCount always includes the adjustment
            // for the character being decoded
            int charCount = count;
            int ch = 0;
            DecoderFallbackBuffer fallback = null;

            if (baseDecoder != null) {
                UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
                ch = decoder.bits;
                charCount -= (ch >> 30);        // Adjust char count for # of expected bytes and expected output chars.

                // Shouldn't have anything in fallback buffer for GetCharCount
                // (don't have to check m_throwOnOverflow for count)
                Contract.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
                    "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at start");
            }

            for (;;)
            {
                // SLOWLOOP: does all range checks, handles all special cases, but it is slow

                if (pSrc >= pEnd) {
                    break;
                }

                if (ch == 0) {
                    // no pending bits
                    goto ReadChar;
                }

                // read next byte. The JIT optimization seems to be getting confused when
                // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
                int cha = *pSrc;
                pSrc++;

                // we are expecting to see trailing bytes like 10vvvvvv
                if ((cha & unchecked((sbyte)0xC0)) != 0x80) {
                    // This can be a valid starting byte for another UTF8 byte sequence, so let's put
                    // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
                    pSrc--;
                    charCount += (ch >> 30);
                    goto InvalidByteSequence;
                }

                // fold in the new byte
                ch = (ch << 6) | (cha & 0x3F);

                if ((ch & FinalByte) == 0) {
                    Contract.Assert( (ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
                        "[UTF8Encoding.GetChars]Invariant volation");

                    if ((ch & SupplimentarySeq) != 0) {
                        if ((ch & (FinalByte >> 6)) != 0) {
                            // this is 3rd byte (of 4 byte supplimentary) - nothing to do
                            continue;
                        }

                        // 2nd byte, check for non-shortest form of supplimentary char and the valid
                        // supplimentary characters in range 0x010000 - 0x10FFFF at the same time
                        if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
                            goto InvalidByteSequence;
                        }
                    }
                    else {
                        // Must be 2nd byte of a 3-byte sequence
                        // check for non-shortest form of 3 byte seq
                        if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
                            (ch & (0xF800 >> 6) ) == (0xD800 >> 6))     // illegal individually encoded surrogate
                        {
                            goto InvalidByteSequence;
                        }
                    }
                    continue;
                }

                // ready to punch

                // adjust for surrogates in non-shortest form
                if ((ch & (SupplimentarySeq | 0x1F0000)) == SupplimentarySeq) {
                    charCount--;
                }
                goto EncodeChar;

            InvalidByteSequence:
                // this code fragment should be close to the gotos referencing it
                // Have to do fallback for invalid bytes
                if (fallback == null)
                {
                    if (baseDecoder == null)
                        fallback = this.decoderFallback.CreateFallbackBuffer();
                    else
                        fallback = baseDecoder.FallbackBuffer;
                    fallback.InternalInitialize(bytes, null);
                }
                charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);

                ch = 0;
                continue;

            ReadChar:
                ch = *pSrc;
                pSrc++;

            ProcessChar:
                if (ch > 0x7F) {
                    // If its > 0x7F, its start of a new multi-byte sequence

                    // Long sequence, so unreserve our char.
                    charCount--;

                    // bit 6 has to be non-zero for start of multibyte chars.
                    if ((ch & 0x40) == 0) {
                        // Unexpected trail byte
                        goto InvalidByteSequence;
                    }

                    // start a new long code
                    if ((ch & 0x20) != 0) {
                        if ((ch & 0x10) != 0) {
                            // 4 byte encoding - supplimentary character (2 surrogates)

                            ch &= 0x0F;

                            // check that bit 4 is zero and the valid supplimentary character
                            // range 0x000000 - 0x10FFFF at the same time
                            if (ch > 0x04) {
                                ch |= 0xf0;
                                goto InvalidByteSequence;
                            }

                            // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
                            // Final byte flag, count fix if we don't make final byte & supplimentary sequence flag.
                            ch |= (FinalByte >> 3*6) |  // Final byte is 3 more bytes from now
                                  (1 << 30) |           // If it dies on next byte we'll need an extra char
                                  (3 << (30-2*6)) |     // If it dies on last byte we'll need to subtract a char
                                (SupplimentarySeq) | (SupplimentarySeq >> 6) |
                                (SupplimentarySeq >> 2*6) | (SupplimentarySeq >> 3*6);

                            // Our character count will be 2 characters for these 4 bytes, so subtract another char
                            charCount--;
                        }
                        else {
                            // 3 byte encoding
                            // Add bit flags so that when we check new characters & rotate we'll be flagged correctly.
                            ch = (ch & 0x0F) | ( (FinalByte >> 2*6) | (1 << 30) |
                                (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2*6) );

                            // We'll expect 1 character for these 3 bytes, so subtract another char.
                            charCount--;
                        }
                    }
                    else {
                        // 2 byte encoding

                        ch &= 0x1F;

                        // check for non-shortest form
                        if (ch <= 1) {
                            ch |= 0xc0;
                            goto InvalidByteSequence;
                        }

                        // Add bit flags so we'll be flagged correctly
                        ch |= (FinalByte >> 6);
                    }
                    continue;
                }

            EncodeChar:

#if FASTLOOP
                int availableBytes = PtrDiff(pEnd, pSrc);

                // don't fall into the fast decoding loop if we don't have enough bytes
                if (availableBytes <= 13) {
                    // try to get over the remainder of the ascii characters fast though
                    byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
                    while (pSrc < pLocalEnd) {
                        ch = *pSrc;
                        pSrc++;

                        if (ch > 0x7F)
                            goto ProcessChar;
                    }
                    // we are done
                    ch = 0;
                    break;
                }

                // To compute the upper bound, assume that all characters are ASCII characters at this point,
                //  the boundary will be decreased for every non-ASCII character we encounter
                // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
                byte *pStop = pSrc + availableBytes - 7;

                while (pSrc < pStop) {
                    ch = *pSrc;
                    pSrc++;

                    if (ch > 0x7F) {
                        goto LongCode;
                    }

                    // get pSrc 2-byte aligned
                    if ((unchecked((int)pSrc) & 0x1) != 0) {
                        ch = *pSrc;
                        pSrc++;
                        if (ch > 0x7F) {
                            goto LongCode;
                        }
                    }

                    // get pSrc 4-byte aligned
                    if ((unchecked((int)pSrc) & 0x2) != 0) {
                        ch = *(ushort*)pSrc;
                        if ((ch & 0x8080) != 0) {
                            goto LongCodeWithMask16;
                        }
                        pSrc += 2;
                    }

                    // Run 8 + 8 characters at a time!
                    while (pSrc < pStop) {
                        ch = *(int*)pSrc;
                        int chb = *(int*)(pSrc+4);
                        if (((ch | chb) & unchecked((int)0x80808080)) != 0) {
                            goto LongCodeWithMask32;
                        }
                        pSrc += 8;

                        // This is a really small loop - unroll it
                        if (pSrc >= pStop)
                            break;

                        ch = *(int*)pSrc;
                        chb = *(int*)(pSrc+4);
                        if (((ch | chb) & unchecked((int)0x80808080)) != 0) {
                            goto LongCodeWithMask32;
                        }
                        pSrc += 8;
                    }
                    break;

#if BIGENDIAN
                LongCodeWithMask32:
                    // be careful about the sign extension
                    ch = (int)(((uint)ch) >> 16);
                LongCodeWithMask16:
                    ch = (int)(((uint)ch) >> 8);
#else // BIGENDIAN
                LongCodeWithMask32:
                LongCodeWithMask16:
                    ch &= 0xFF;
#endif // BIGENDIAN
                    pSrc++;
                    if (ch <= 0x7F) {
                        continue;
                    }

                LongCode:
                    int chc = *pSrc;
                    pSrc++;

                    if (
                        // bit 6 has to be zero
                        (ch & 0x40) == 0 ||
                        // we are expecting to see trailing bytes like 10vvvvvv
                        (chc & unchecked((sbyte)0xC0)) != 0x80)
                    {
                        goto BadLongCode;
                    }

                    chc &= 0x3F;

                    // start a new long code
                    if ((ch & 0x20) != 0) {

                        // fold the first two bytes together
                        chc |= (ch & 0x0F) << 6;

                        if ((ch & 0x10) != 0) {
                            // 4 byte encoding - surrogate
                            ch = *pSrc;
                            if (
                                // check that bit 4 is zero, the non-shortest form of surrogate
                                // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
                                !InRange(chc >> 4, 0x01, 0x10) ||
                                // we are expecting to see trailing bytes like 10vvvvvv
                                (ch & unchecked((sbyte)0xC0)) != 0x80 )
                            {
                                goto BadLongCode;
                            }

                            chc = (chc << 6) | (ch & 0x3F);

                            ch = *(pSrc+1);
                            // we are expecting to see trailing bytes like 10vvvvvv
                            if ((ch & unchecked((sbyte)0xC0)) != 0x80) {
                                goto BadLongCode;
                            }
                            pSrc += 2;

                            // extra byte
                            charCount--;
                        }
                        else {
                            // 3 byte encoding
                            ch = *pSrc;
                            if (
                                // check for non-shortest form of 3 byte seq
                                (chc & (0x1F << 5)) == 0 ||
                                // Can't have surrogates here.
                                (chc & (0xF800 >> 6) ) == (0xD800 >> 6) ||
                                // we are expecting to see trailing bytes like 10vvvvvv
                                (ch & unchecked((sbyte)0xC0)) != 0x80 )
                            {
                                goto BadLongCode;
                            }
                            pSrc++;

                            // extra byte
                            charCount--;
                        }
                    }
                    else {
                        // 2 byte encoding

                        // check for non-shortest form
                        if ((ch & 0x1E) == 0) {
                            goto BadLongCode;
                        }
                    }

                    // extra byte
                    charCount--;
                }
#endif // FASTLOOP

                // no pending bits at this point
                ch = 0;
                continue;

            BadLongCode:
                pSrc -= 2;
                ch = 0;
                continue;
            }

            // May have a problem if we have to flush
            if (ch != 0)
            {
                // We were already adjusting for these, so need to unadjust
                charCount += (ch >> 30);
                if (baseDecoder == null || baseDecoder.MustFlush)
                {
                    // Have to do fallback for invalid bytes
                    if (fallback == null)
                    {
                        if (baseDecoder == null)
                            fallback = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallback = baseDecoder.FallbackBuffer;
                        fallback.InternalInitialize(bytes, null);
                    }
                    charCount += FallbackInvalidByteSequence(pSrc, ch, fallback);
                }
            }

            // Shouldn't have anything in fallback buffer for GetCharCount
            // (don't have to check m_throwOnOverflow for count)
            Contract.Assert(fallback == null || fallback.Remaining == 0,
                "[UTF8Encoding.GetCharCount]Expected empty fallback buffer at end");

            return charCount;
        }
Example #47
0
        internal override unsafe int GetChars(byte *bytes, int byteCount,
                                              char *chars, int charCount, DecoderNLS baseDecoder)
        {
            Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
            Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
            Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");

            // Might use a decoder
            UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder;

            // Get our output buffer info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount);

            // Get decoder info
            int  bits      = 0;
            int  bitCount  = -1;
            bool firstByte = false;

            if (decoder != null)
            {
                bits      = decoder.bits;
                bitCount  = decoder.bitCount;
                firstByte = decoder.firstByte;

                Debug.Assert(firstByte == false || decoder.bitCount <= 0,
                             "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
            }

            // We may have had bits in the decoder that we couldn't output last time, so do so now
            if (bitCount >= 16)
            {
                // Check our decoder buffer
                if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
                {
                    ThrowCharsOverflow(decoder, true);  // Always throw, they need at least 1 char even in Convert
                }
                // Used this one, clean up extra bits
                bitCount -= 16;
            }

            // Loop through the input
            while (buffer.MoreData)
            {
                byte currentByte = buffer.GetNextByte();
                int  c;

                if (bitCount >= 0)
                {
                    //
                    // Modified base 64 encoding.
                    //
                    sbyte v;
                    if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0))
                    {
                        firstByte = false;
                        bits      = (bits << 6) | ((byte)v);
                        bitCount += 6;
                        if (bitCount >= 16)
                        {
                            c         = (bits >> (bitCount - 16)) & 0xFFFF;
                            bitCount -= 16;
                        }
                        // If not enough bits just continue
                        else
                        {
                            continue;
                        }
                    }
                    else
                    {
                        // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
                        bitCount = -1;

                        if (currentByte != '-')
                        {
                            // >= 0x80 (because of 1st if statemtn)
                            // We need this check since the _base64Values[b] check below need b <= 0x7f.
                            // This is not a valid base 64 byte.  Terminate the shifted-sequence and
                            // emit this byte.

                            // not in base 64 table
                            // According to the RFC 1642 and the example code of UTF-7
                            // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte

                            // Chars won't be updated unless this works, try to fallback
                            if (!buffer.Fallback(currentByte))
                            {
                                break;                                          // Stop here, didn't throw
                            }
                            // Used that byte, we're done with it
                            continue;
                        }

                        //
                        // The encoding for '+' is "+-".
                        //
                        if (firstByte)
                        {
                            c = '+';
                        }
                        // We just turn it off if not emitting a +, so we're done.
                        else
                        {
                            continue;
                        }
                    }
                    //
                    // End of modified base 64 encoding block.
                    //
                }
                else if (currentByte == '+')
                {
                    //
                    // Found the start of a modified base 64 encoding block or a plus sign.
                    //
                    bitCount  = 0;
                    firstByte = true;
                    continue;
                }
                else
                {
                    // Normal character
                    if (currentByte >= 0x80)
                    {
                        // Try to fallback
                        if (!buffer.Fallback(currentByte))
                        {
                            break;                                          // Stop here, didn't throw
                        }
                        // Done falling back
                        continue;
                    }

                    // Use the normal character
                    c = currentByte;
                }

                if (c >= 0)
                {
                    // Check our buffer
                    if (!buffer.AddChar((char)c))
                    {
                        // No room.  If it was a plain char we'll try again later.
                        // Note, we'll consume this byte and stick it in decoder, even if we can't output it
                        if (bitCount >= 0)                                  // Can we rememmber this byte (char)
                        {
                            buffer.AdjustBytes(+1);                         // Need to readd the byte that AddChar subtracted when it failed
                            bitCount += 16;                                 // We'll still need that char we have in our bits
                        }
                        break;                                              // didn't throw, stop
                    }
                }
            }

            // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
            if (chars != null && decoder != null)
            {
                // MustFlush?  (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
                if (decoder.MustFlush)
                {
                    // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
                    decoder.bits      = 0;
                    decoder.bitCount  = -1;
                    decoder.firstByte = false;
                }
                else
                {
                    decoder.bits      = bits;
                    decoder.bitCount  = bitCount;
                    decoder.firstByte = firstByte;
                }
                decoder._bytesUsed = buffer.BytesUsed;
            }
            // else ignore any hanging bits.

            // Return our count
            return(buffer.Count);
        }
Example #48
0
        internal override unsafe int GetChars(byte *bytes, int byteCount, char *chars, int charCount, DecoderNLS baseDecoder)
        {
            ISCIIEncoding.ISCIIDecoder  isciiDecoder       = (ISCIIEncoding.ISCIIDecoder)baseDecoder;
            Encoding.EncodingCharBuffer encodingCharBuffer = new Encoding.EncodingCharBuffer((Encoding)this, (DecoderNLS)isciiDecoder, chars, charCount, bytes, byteCount);
            int  index1 = this.defaultCodePage;
            bool flag1  = false;
            bool flag2  = false;
            bool flag3  = false;
            char ch1    = char.MinValue;
            char ch2    = char.MinValue;

            if (isciiDecoder != null)
            {
                index1 = isciiDecoder.currentCodePage;
                flag1  = isciiDecoder.bLastATR;
                flag2  = isciiDecoder.bLastVirama;
                flag3  = isciiDecoder.bLastDevenagariStressAbbr;
                ch1    = isciiDecoder.cLastCharForNextNukta;
                ch2    = isciiDecoder.cLastCharForNoNextNukta;
            }
            bool flag4  = flag2 | flag1 | flag3 | (uint)ch1 > 0U;
            int  index2 = -1;

            if (index1 >= 2 && index1 <= 11)
            {
                index2 = ISCIIEncoding.IndicMappingIndex[index1];
            }
            while (encodingCharBuffer.MoreData)
            {
                byte nextByte = encodingCharBuffer.GetNextByte();
                if (flag4)
                {
                    flag4 = false;
                    if (flag1)
                    {
                        if ((int)nextByte >= 66 && (int)nextByte <= 75)
                        {
                            index1 = (int)nextByte & 15;
                            index2 = ISCIIEncoding.IndicMappingIndex[index1];
                            flag1  = false;
                            continue;
                        }
                        if ((int)nextByte == 64)
                        {
                            index1 = this.defaultCodePage;
                            index2 = -1;
                            if (index1 >= 2 && index1 <= 11)
                            {
                                index2 = ISCIIEncoding.IndicMappingIndex[index1];
                            }
                            flag1 = false;
                            continue;
                        }
                        if ((int)nextByte == 65)
                        {
                            index1 = this.defaultCodePage;
                            index2 = -1;
                            if (index1 >= 2 && index1 <= 11)
                            {
                                index2 = ISCIIEncoding.IndicMappingIndex[index1];
                            }
                            flag1 = false;
                            continue;
                        }
                        if (encodingCharBuffer.Fallback((byte)239))
                        {
                            flag1 = false;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else if (flag2)
                    {
                        if ((int)nextByte == 232)
                        {
                            if (encodingCharBuffer.AddChar('\x200C'))
                            {
                                flag2 = false;
                                continue;
                            }
                            break;
                        }
                        if ((int)nextByte == 233)
                        {
                            if (encodingCharBuffer.AddChar('\x200D'))
                            {
                                flag2 = false;
                                continue;
                            }
                            break;
                        }
                        flag2 = false;
                    }
                    else if (flag3)
                    {
                        if ((int)nextByte == 184)
                        {
                            if (encodingCharBuffer.AddChar('॒'))
                            {
                                flag3 = false;
                                continue;
                            }
                            break;
                        }
                        if ((int)nextByte == 191)
                        {
                            if (encodingCharBuffer.AddChar('॰'))
                            {
                                flag3 = false;
                                continue;
                            }
                            break;
                        }
                        if (encodingCharBuffer.Fallback((byte)240))
                        {
                            flag3 = false;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        if ((int)nextByte == 233)
                        {
                            if (encodingCharBuffer.AddChar(ch1))
                            {
                                ch1 = ch2 = char.MinValue;
                                continue;
                            }
                            break;
                        }
                        if (encodingCharBuffer.AddChar(ch2))
                        {
                            ch1 = ch2 = char.MinValue;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                if ((int)nextByte < 160)
                {
                    if (!encodingCharBuffer.AddChar((char)nextByte))
                    {
                        break;
                    }
                }
                else if ((int)nextByte == 239)
                {
                    flag1 = flag4 = true;
                }
                else
                {
                    char ch3 = ISCIIEncoding.IndicMapping[index2, 0, (int)nextByte - 160];
                    char ch4 = ISCIIEncoding.IndicMapping[index2, 1, (int)nextByte - 160];
                    if ((int)ch4 == 0 || (int)nextByte == 233)
                    {
                        if ((int)ch3 == 0)
                        {
                            if (!encodingCharBuffer.Fallback(nextByte))
                            {
                                break;
                            }
                        }
                        else if (!encodingCharBuffer.AddChar(ch3))
                        {
                            break;
                        }
                    }
                    else if ((int)nextByte == 232)
                    {
                        if (encodingCharBuffer.AddChar(ch3))
                        {
                            flag2 = flag4 = true;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else if (((int)ch4 & 61440) == 0)
                    {
                        flag4 = true;
                        ch1   = ch4;
                        ch2   = ch3;
                    }
                    else
                    {
                        flag3 = flag4 = true;
                    }
                }
            }
            if (isciiDecoder == null || isciiDecoder.MustFlush)
            {
                if (flag1)
                {
                    if (encodingCharBuffer.Fallback((byte)239))
                    {
                        flag1 = false;
                    }
                    else
                    {
                        int num1 = (int)encodingCharBuffer.GetNextByte();
                    }
                }
                else if (flag3)
                {
                    if (encodingCharBuffer.Fallback((byte)240))
                    {
                        flag3 = false;
                    }
                    else
                    {
                        int num2 = (int)encodingCharBuffer.GetNextByte();
                    }
                }
                else if ((int)ch2 != 0)
                {
                    if (encodingCharBuffer.AddChar(ch2))
                    {
                        ch2 = ch1 = char.MinValue;
                    }
                    else
                    {
                        int num3 = (int)encodingCharBuffer.GetNextByte();
                    }
                }
            }
            if (isciiDecoder != null && (IntPtr)chars != IntPtr.Zero)
            {
                if (((!isciiDecoder.MustFlush ? 1 : ((uint)ch2 > 0U ? 1 : 0)) | (flag1 ? 1 : 0) | (flag3 ? 1 : 0)) != 0)
                {
                    isciiDecoder.currentCodePage           = index1;
                    isciiDecoder.bLastVirama               = flag2;
                    isciiDecoder.bLastATR                  = flag1;
                    isciiDecoder.bLastDevenagariStressAbbr = flag3;
                    isciiDecoder.cLastCharForNextNukta     = ch1;
                    isciiDecoder.cLastCharForNoNextNukta   = ch2;
                }
                else
                {
                    isciiDecoder.currentCodePage           = this.defaultCodePage;
                    isciiDecoder.bLastVirama               = false;
                    isciiDecoder.bLastATR                  = false;
                    isciiDecoder.bLastDevenagariStressAbbr = false;
                    isciiDecoder.cLastCharForNextNukta     = char.MinValue;
                    isciiDecoder.cLastCharForNoNextNukta   = char.MinValue;
                }
                isciiDecoder.m_bytesUsed = encodingCharBuffer.BytesUsed;
            }
            return(encodingCharBuffer.Count);
        }
Example #49
0
        public override unsafe int GetChars(byte *bytes, int byteCount,
                                            char *chars, int charCount, DecoderNLS decoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            Debug.Assert(bytes != null, "[SBCSCodePageEncoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[SBCSCodePageEncoding.GetChars]byteCount is negative");
            Debug.Assert(chars != null, "[SBCSCodePageEncoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[SBCSCodePageEncoding.GetChars]charCount is negative");

            CheckMemorySection();

            // See if we have best fit
            bool bUseBestFit = false;

            // Do it fast way if using ? replacement or best fit fallbacks
            byte *byteEnd   = bytes + byteCount;
            byte *byteStart = bytes;
            char *charStart = chars;

            // Only need decoder fallback buffer if not using default replacement fallback or best fit fallback.
            DecoderReplacementFallback fallback = null;

            if (decoder == null)
            {
                fallback    = DecoderFallback as DecoderReplacementFallback;
                bUseBestFit = DecoderFallback is InternalDecoderBestFitFallback;
            }
            else
            {
                fallback    = decoder.Fallback as DecoderReplacementFallback;
                bUseBestFit = decoder.Fallback is InternalDecoderBestFitFallback;
                Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
                             decoder.FallbackBuffer.Remaining == 0,
                             "[SBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");
            }

            if (bUseBestFit || (fallback != null && fallback.MaxCharCount == 1))
            {
                // Try it the fast way
                char replacementChar;
                if (fallback == null)
                {
                    replacementChar = '?';  // Best fit always has ? for fallback for SBCS
                }
                else
                {
                    replacementChar = fallback.DefaultString[0];
                }

                // Need byteCount chars, otherwise too small buffer
                if (charCount < byteCount)
                {
                    // Need at least 1 output byte, throw if must throw
                    ThrowCharsOverflow(decoder, charCount < 1);

                    // Not throwing, use what we can
                    byteEnd = bytes + charCount;
                }

                // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
                while (bytes < byteEnd)
                {
                    char c;
                    if (bUseBestFit)
                    {
                        if (arrayBytesBestFit == null)
                        {
                            ReadBestFitTable();
                        }
                        c = arrayBytesBestFit[*bytes];
                    }
                    else
                    {
                        c = _mapBytesToUnicode[*bytes];
                    }
                    bytes++;

                    if (c == UNKNOWN_CHAR)
                    {
                        // This is an invalid byte in the ASCII encoding.
                        *chars = replacementChar;
                    }
                    else
                    {
                        *chars = c;
                    }
                    chars++;
                }

                // bytes & chars used are the same
                if (decoder != null)
                {
                    decoder.m_bytesUsed = (int)(bytes - byteStart);
                }
                return((int)(chars - charStart));
            }

            // Slower way's going to need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            byte[] byteBuffer = new byte[1];
            char * charEnd    = chars + charCount;

            DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(null);

            // Not quite so fast loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                char c = _mapBytesToUnicode[*bytes];
                bytes++;

                // See if it was unknown
                if (c == UNKNOWN_CHAR)
                {
                    // Make sure we have a fallback buffer
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }

                        fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);

                        fallbackHelper.InternalInitialize(byteEnd - byteCount, charEnd);
                    }

                    // Use fallback buffer
                    Debug.Assert(bytes > byteStart,
                                 "[SBCSCodePageEncoding.GetChars]Expected bytes to have advanced already (unknown byte)");
                    byteBuffer[0] = *(bytes - 1);
                    // Fallback adds fallback to chars, but doesn't increment chars unless the whole thing fits.
                    if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars))
                    {
                        // May or may not throw, but we didn't get this byte
                        bytes--;                                            // unused byte
                        fallbackHelper.InternalReset();                     // Didn't fall this back
                        ThrowCharsOverflow(decoder, bytes == byteStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }
                }
                else
                {
                    // Make sure we have buffer space
                    if (chars >= charEnd)
                    {
                        Debug.Assert(bytes > byteStart,
                                     "[SBCSCodePageEncoding.GetChars]Expected bytes to have advanced already (known byte)");
                        bytes--;                                            // unused byte
                        ThrowCharsOverflow(decoder, bytes == byteStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }

                    *(chars) = c;
                    chars++;
                }
            }

            // Might have had decoder fallback stuff.
            if (decoder != null)
            {
                decoder.m_bytesUsed = (int)(bytes - byteStart);
            }

            // Expect Empty fallback buffer for GetChars
            Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
                         "[SBCSEncoding.GetChars]Expected Empty fallback buffer at end");

            return((int)(chars - charStart));
        }
Example #50
0
        internal override unsafe int GetCharCount(byte *bytes, int count, DecoderNLS baseDecoder)
        {
            Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
            Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");

            UTF32Decoder decoder = (UTF32Decoder)baseDecoder;

            // None so far!
            int   charCount = 0;
            byte *end       = bytes + count;
            byte *byteStart = bytes;

            // Set up decoder
            int  readCount = 0;
            uint iChar     = 0;

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            // See if there's anything in our decoder
            if (decoder != null)
            {
                readCount      = decoder.readByteCount;
                iChar          = (uint)decoder.iChar;
                fallbackBuffer = decoder.FallbackBuffer;

                // Shouldn't have anything in fallback buffer for GetCharCount
                // (don't have to check m_throwOnOverflow for chars or count)
                Debug.Assert(fallbackBuffer.Remaining == 0,
                             "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
            }
            else
            {
                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
            }

            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(byteStart, null);

            // Loop through our input, 4 characters at a time!
            while (bytes < end && charCount >= 0)
            {
                // Get our next character
                if (bigEndian)
                {
                    // Scoot left and add it to the bottom
                    iChar <<= 8;
                    iChar  += *(bytes++);
                }
                else
                {
                    // Scoot right and add it to the top
                    iChar >>= 8;
                    iChar  += (uint)(*(bytes++)) << 24;
                }

                readCount++;

                // See if we have all the bytes yet
                if (readCount < 4)
                {
                    continue;
                }

                // Have the bytes
                readCount = 0;

                // See if its valid to encode
                if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
                {
                    // Need to fall back these 4 bytes
                    byte[] fallbackBytes;
                    if (bigEndian)
                    {
                        fallbackBytes = new byte[] {
                            unchecked ((byte)(iChar >> 24)), unchecked ((byte)(iChar >> 16)),
                            unchecked ((byte)(iChar >> 8)), unchecked ((byte)(iChar))
                        };
                    }
                    else
                    {
                        fallbackBytes = new byte[] {
                            unchecked ((byte)(iChar)), unchecked ((byte)(iChar >> 8)),
                            unchecked ((byte)(iChar >> 16)), unchecked ((byte)(iChar >> 24))
                        };
                    }

                    charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);

                    // Ignore the illegal character
                    iChar = 0;
                    continue;
                }

                // Ok, we have something we can add to our output
                if (iChar >= 0x10000)
                {
                    // Surrogates take 2
                    charCount++;
                }

                // Add the rest of the surrogate or our normal character
                charCount++;

                // iChar is back to 0
                iChar = 0;
            }

            // See if we have something left over that has to be decoded
            if (readCount > 0 && (decoder == null || decoder.MustFlush))
            {
                // Oops, there's something left over with no place to go.
                byte[] fallbackBytes = new byte[readCount];
                if (bigEndian)
                {
                    while (readCount > 0)
                    {
                        fallbackBytes[--readCount] = unchecked ((byte)iChar);
                        iChar >>= 8;
                    }
                }
                else
                {
                    while (readCount > 0)
                    {
                        fallbackBytes[--readCount] = unchecked ((byte)(iChar >> 24));
                        iChar <<= 8;
                    }
                }

                charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
            }

            // Check for overflows.
            if (charCount < 0)
            {
                throw new ArgumentOutOfRangeException(nameof(count), Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
            }

            // Shouldn't have anything in fallback buffer for GetCharCount
            // (don't have to check m_throwOnOverflow for chars or count)
            Debug.Assert(fallbackBuffer.Remaining == 0,
                         "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");

            // Return our count
            return(charCount);
        }
Example #51
0
        internal override unsafe int GetChars(byte *bytes, int byteCount,
                                              char *chars, int charCount, DecoderNLS decoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            Debug.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative");
            Debug.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative");

            // Do it fast way if using ? replacement fallback
            byte *byteEnd   = bytes + byteCount;
            byte *byteStart = bytes;
            char *charStart = chars;

            // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
            // Only need decoder fallback buffer if not using ? fallback.
            // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
            DecoderReplacementFallback fallback = null;
            char *charsForFallback;

            if (decoder == null)
            {
                fallback = this.DecoderFallback as DecoderReplacementFallback;
            }
            else
            {
                fallback = decoder.Fallback as DecoderReplacementFallback;
                Debug.Assert(!decoder.m_throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
                             decoder.FallbackBuffer.Remaining == 0,
                             "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer");
            }

            if (fallback != null && fallback.MaxCharCount == 1)
            {
                // Try it the fast way
                char replacementChar = fallback.DefaultString[0];

                // Need byteCount chars, otherwise too small buffer
                if (charCount < byteCount)
                {
                    // Need at least 1 output byte, throw if must throw
                    ThrowCharsOverflow(decoder, charCount < 1);

                    // Not throwing, use what we can
                    byteEnd = bytes + charCount;
                }

                // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
                while (bytes < byteEnd)
                {
                    byte b = *(bytes++);
                    if (b >= 0x80)
                    {
                        // This is an invalid byte in the ASCII encoding.
                        *(chars++) = replacementChar;
                    }
                    else
                    {
                        *(chars++) = unchecked ((char)b);
                    }
                }

                // bytes & chars used are the same
                if (decoder != null)
                {
                    decoder.m_bytesUsed = (int)(bytes - byteStart);
                }
                return((int)(chars - charStart));
            }

            // Slower way's going to need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            byte[] byteBuffer = new byte[1];
            char * charEnd    = chars + charCount;

            // Not quite so fast loop
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                byte b = *(bytes);
                bytes++;

                if (b >= 0x80)
                {
                    // This is an invalid byte in the ASCII encoding.
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                        {
                            fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
                        }
                        else
                        {
                            fallbackBuffer = decoder.FallbackBuffer;
                        }
                        fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
                    }

                    // Use fallback buffer
                    byteBuffer[0] = b;

                    // Note that chars won't get updated unless this succeeds
                    charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
                    bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
                    chars = charsForFallback;

                    if (!fallbackResult)
                    {
                        // May or may not throw, but we didn't get this byte
                        Debug.Assert(bytes > byteStart || chars == charStart,
                                     "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)");
                        bytes--;                                            // unused byte
                        fallbackBuffer.InternalReset();                     // Didn't fall this back
                        ThrowCharsOverflow(decoder, chars == charStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }
                }
                else
                {
                    // Make sure we have buffer space
                    if (chars >= charEnd)
                    {
                        Debug.Assert(bytes > byteStart || chars == charStart,
                                     "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)");
                        bytes--;                                            // unused byte
                        ThrowCharsOverflow(decoder, chars == charStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }

                    *(chars) = unchecked ((char)b);
                    chars++;
                }
            }

            // Might have had decoder fallback stuff.
            if (decoder != null)
            {
                decoder.m_bytesUsed = (int)(bytes - byteStart);
            }

            // Expect Empty fallback buffer for GetChars
            Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
                         "[ASCIIEncoding.GetChars]Expected Empty fallback buffer");

            return((int)(chars - charStart));
        }
Example #52
0
        internal override unsafe int GetChars(byte *bytes, int byteCount,
                                              char *chars, int charCount, DecoderNLS baseDecoder)
        {
            Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
            Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
            Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
            Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");

            UTF32Decoder decoder = (UTF32Decoder)baseDecoder;

            // None so far!
            char *charStart = chars;
            char *charEnd   = chars + charCount;

            byte *byteStart = bytes;
            byte *byteEnd   = bytes + byteCount;

            // See if there's anything in our decoder (but don't clear it yet)
            int  readCount = 0;
            uint iChar     = 0;

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;
            char *charsForFallback;

            // See if there's anything in our decoder
            if (decoder != null)
            {
                readCount      = decoder.readByteCount;
                iChar          = (uint)decoder.iChar;
                fallbackBuffer = baseDecoder.FallbackBuffer;

                // Shouldn't have anything in fallback buffer for GetChars
                // (don't have to check m_throwOnOverflow for chars)
                Debug.Assert(fallbackBuffer.Remaining == 0,
                             "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
            }
            else
            {
                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
            }

            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(bytes, chars + charCount);

            // Loop through our input, 4 characters at a time!
            while (bytes < byteEnd)
            {
                // Get our next character
                if (bigEndian)
                {
                    // Scoot left and add it to the bottom
                    iChar <<= 8;
                    iChar  += *(bytes++);
                }
                else
                {
                    // Scoot right and add it to the top
                    iChar >>= 8;
                    iChar  += (uint)(*(bytes++)) << 24;
                }

                readCount++;

                // See if we have all the bytes yet
                if (readCount < 4)
                {
                    continue;
                }

                // Have the bytes
                readCount = 0;

                // See if its valid to encode
                if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
                {
                    // Need to fall back these 4 bytes
                    byte[] fallbackBytes;
                    if (bigEndian)
                    {
                        fallbackBytes = new byte[] {
                            unchecked ((byte)(iChar >> 24)), unchecked ((byte)(iChar >> 16)),
                            unchecked ((byte)(iChar >> 8)), unchecked ((byte)(iChar))
                        };
                    }
                    else
                    {
                        fallbackBytes = new byte[] {
                            unchecked ((byte)(iChar)), unchecked ((byte)(iChar >> 8)),
                            unchecked ((byte)(iChar >> 16)), unchecked ((byte)(iChar >> 24))
                        };
                    }

                    // Chars won't be updated unless this works.
                    charsForFallback = chars;
                    bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
                    chars = charsForFallback;

                    if (!fallbackResult)
                    {
                        // Couldn't fallback, throw or wait til next time
                        // We either read enough bytes for bytes-=4 to work, or we're
                        // going to throw in ThrowCharsOverflow because chars == charStart
                        Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
                                     "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
                        bytes -= 4;                                      // get back to where we were
                        iChar  = 0;                                      // Remembering nothing
                        fallbackBuffer.InternalReset();
                        ThrowCharsOverflow(decoder, chars == charStart); // Might throw, if no chars output
                        break;                                           // Stop here, didn't throw
                    }

                    // Ignore the illegal character
                    iChar = 0;
                    continue;
                }


                // Ok, we have something we can add to our output
                if (iChar >= 0x10000)
                {
                    // Surrogates take 2
                    if (chars >= charEnd - 1)
                    {
                        // Throwing or stopping
                        // We either read enough bytes for bytes-=4 to work, or we're
                        // going to throw in ThrowCharsOverflow because chars == charStart
                        Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
                                     "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
                        bytes -= 4;                                      // get back to where we were
                        iChar  = 0;                                      // Remembering nothing
                        ThrowCharsOverflow(decoder, chars == charStart); // Might throw, if no chars output
                        break;                                           // Stop here, didn't throw
                    }

                    *(chars++) = GetHighSurrogate(iChar);
                    iChar      = GetLowSurrogate(iChar);
                }
                // Bounds check for normal character
                else if (chars >= charEnd)
                {
                    // Throwing or stopping
                    // We either read enough bytes for bytes-=4 to work, or we're
                    // going to throw in ThrowCharsOverflow because chars == charStart
                    Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
                                 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
                    bytes -= 4;                                      // get back to where we were
                    iChar  = 0;                                      // Remembering nothing
                    ThrowCharsOverflow(decoder, chars == charStart); // Might throw, if no chars output
                    break;                                           // Stop here, didn't throw
                }

                // Add the rest of the surrogate or our normal character
                *(chars++) = (char)iChar;

                // iChar is back to 0
                iChar = 0;
            }

            // See if we have something left over that has to be decoded
            if (readCount > 0 && (decoder == null || decoder.MustFlush))
            {
                // Oops, there's something left over with no place to go.
                byte[] fallbackBytes = new byte[readCount];
                int    tempCount     = readCount;
                if (bigEndian)
                {
                    while (tempCount > 0)
                    {
                        fallbackBytes[--tempCount] = unchecked ((byte)iChar);
                        iChar >>= 8;
                    }
                }
                else
                {
                    while (tempCount > 0)
                    {
                        fallbackBytes[--tempCount] = unchecked ((byte)(iChar >> 24));
                        iChar <<= 8;
                    }
                }

                charsForFallback = chars;
                bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
                chars = charsForFallback;

                if (!fallbackResult)
                {
                    // Couldn't fallback.
                    fallbackBuffer.InternalReset();
                    ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                    // Stop here, didn't throw, backed up, so still nothing in buffer
                }
                else
                {
                    // Don't clear our decoder unless we could fall it back.
                    // If we caught the if above, then we're a convert() and will catch this next time.
                    readCount = 0;
                    iChar     = 0;
                }
            }

            // Remember any left over stuff, clearing buffer as well for MustFlush
            if (decoder != null)
            {
                decoder.iChar         = (int)iChar;
                decoder.readByteCount = readCount;
                decoder.m_bytesUsed   = (int)(bytes - byteStart);
            }

            // Shouldn't have anything in fallback buffer for GetChars
            // (don't have to check m_throwOnOverflow for chars)
            Debug.Assert(fallbackBuffer.Remaining == 0,
                         "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");

            // Return our count
            return((int)(chars - charStart));
        }
Example #53
0
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            Contract.Assert(chars!=null, "[UTF8Encoding.GetChars]chars!=null");
            Contract.Assert(byteCount >=0, "[UTF8Encoding.GetChars]count >=0");
            Contract.Assert(charCount >=0, "[UTF8Encoding.GetChars]charCount >=0");
            Contract.Assert(bytes!=null, "[UTF8Encoding.GetChars]bytes!=null");

            byte *pSrc = bytes;
            char *pTarget = chars;

            byte *pEnd = pSrc+byteCount;
            char *pAllocatedBufferEnd = pTarget+charCount;

            int ch = 0;

            DecoderFallbackBuffer fallback = null;
            if (baseDecoder != null) {
                UTF8Decoder decoder = (UTF8Decoder)baseDecoder;
                ch = decoder.bits;

                // Shouldn't have anything in fallback buffer for GetChars
                // (don't have to check m_throwOnOverflow for chars, we always use all or none so always should be empty)
                Contract.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
                    "[UTF8Encoding.GetChars]Expected empty fallback buffer at start");
            }

            for (;;)
            {
                // SLOWLOOP: does all range checks, handles all special cases, but it is slow

                if (pSrc >= pEnd) {
                    break;
                }

                if (ch == 0) {
                    // no pending bits
                    goto ReadChar;
                }

                // read next byte. The JIT optimization seems to be getting confused when
                // compiling "ch = *pSrc++;", so rather use "ch = *pSrc; pSrc++;" instead
                int cha = *pSrc;
                pSrc++;

                // we are expecting to see trailing bytes like 10vvvvvv
                if ((cha & unchecked((sbyte)0xC0)) != 0x80) {
                    // This can be a valid starting byte for another UTF8 byte sequence, so let's put
                    // the current byte back, and try to see if this is a valid byte for another UTF8 byte sequence
                    pSrc--;
                    goto InvalidByteSequence;
                }

                // fold in the new byte
                ch = (ch << 6) | (cha & 0x3F);

                if ((ch & FinalByte) == 0) {
                    // Not at last byte yet
                    Contract.Assert( (ch & (SupplimentarySeq | ThreeByteSeq)) != 0,
                        "[UTF8Encoding.GetChars]Invariant volation");

                    if ((ch & SupplimentarySeq) != 0) {
                        // Its a 4-byte supplimentary sequence
                        if ((ch & (FinalByte >> 6)) != 0) {
                            // this is 3rd byte of 4 byte sequence - nothing to do
                            continue;
                        }

                        // 2nd byte of 4 bytes
                        // check for non-shortest form of surrogate and the valid surrogate
                        // range 0x000000 - 0x10FFFF at the same time
                        if (!InRange(ch & 0x1F0, 0x10, 0x100)) {
                            goto InvalidByteSequence;
                        }
                    }
                    else {
                        // Must be 2nd byte of a 3-byte sequence
                        // check for non-shortest form of 3 byte seq
                        if ((ch & (0x1F << 5)) == 0 ||                  // non-shortest form
                            (ch & (0xF800 >> 6) ) == (0xD800 >> 6))     // illegal individually encoded surrogate
                        {
                            goto InvalidByteSequence;
                        }
                    }
                    continue;
                }

                // ready to punch

                // surrogate in shortest form?
                // Might be possible to get rid of this?  Already did non-shortest check for 4-byte sequence when reading 2nd byte?
                if ((ch & (SupplimentarySeq | 0x1F0000)) > SupplimentarySeq) {
                    // let the range check for the second char throw the exception
                    if (pTarget < pAllocatedBufferEnd) {
                        *pTarget = (char)( ((ch >> 10) & 0x7FF) +
                            unchecked((short)((CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10)))) );
                        pTarget++;

                        ch = (ch & 0x3FF) +
                            unchecked((int)(CharUnicodeInfo.LOW_SURROGATE_START));
                    }
                }

                goto EncodeChar;

            InvalidByteSequence:
                // this code fragment should be close to the gotos referencing it
                // Have to do fallback for invalid bytes
                if (fallback == null)
                {
                    if (baseDecoder == null)
                        fallback = this.decoderFallback.CreateFallbackBuffer();
                    else
                        fallback = baseDecoder.FallbackBuffer;
                    fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
                }
                // This'll back us up the appropriate # of bytes if we didn't get anywhere
                if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget))
                {
                    // Ran out of buffer space
                    // Need to throw an exception?
                    Contract.Assert(pSrc >= bytes || pTarget == chars,
                        "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer after fallback");
                    fallback.InternalReset();
                    ThrowCharsOverflow(baseDecoder, pTarget == chars);
                    ch = 0;
                    break;
                }
                Contract.Assert(pSrc >= bytes, 
                    "[UTF8Encoding.GetChars]Expected invalid byte sequence to have remained within the byte array");
                ch = 0;
                continue;

            ReadChar:
                ch = *pSrc;
                pSrc++;

            ProcessChar:
                if (ch > 0x7F) {
                    // If its > 0x7F, its start of a new multi-byte sequence

                    // bit 6 has to be non-zero
                    if ((ch & 0x40) == 0) {
                        goto InvalidByteSequence;
                    }

                    // start a new long code
                    if ((ch & 0x20) != 0) {
                        if ((ch & 0x10) != 0) {
                            // 4 byte encoding - supplimentary character (2 surrogates)

                            ch &= 0x0F;

                            // check that bit 4 is zero and the valid supplimentary character
                            // range 0x000000 - 0x10FFFF at the same time
                            if (ch > 0x04) {
                                ch |= 0xf0;
                                goto InvalidByteSequence;
                            }

                            ch |= (FinalByte >> 3*6) | (1 << 30) | (3 << (30-2*6)) |
                                (SupplimentarySeq) | (SupplimentarySeq >> 6) |
                                (SupplimentarySeq >> 2*6) | (SupplimentarySeq >> 3*6);
                        }
                        else {
                            // 3 byte encoding
                            ch = (ch & 0x0F) | ( (FinalByte >> 2*6) | (1 << 30) |
                                (ThreeByteSeq) | (ThreeByteSeq >> 6) | (ThreeByteSeq >> 2*6) );
                        }
                    }
                    else {
                        // 2 byte encoding

                        ch &= 0x1F;

                        // check for non-shortest form
                        if (ch <= 1) {
                            ch |= 0xc0;
                            goto InvalidByteSequence;
                        }

                        ch |= (FinalByte >> 6);
                    }
                    continue;
                }

            EncodeChar:
                // write the pending character
                if (pTarget >= pAllocatedBufferEnd)
                {
                    // Fix chars so we make sure to throw if we didn't output anything
                    ch &= 0x1fffff;
                    if (ch > 0x7f)
                    {
                        if (ch > 0x7ff)
                        {
                            if (ch >= CharUnicodeInfo.LOW_SURROGATE_START &&
                                ch <= CharUnicodeInfo.LOW_SURROGATE_END)
                            {
                                pSrc--;     // It was 4 bytes
                                pTarget--;  // 1 was stored already, but we can't remember 1/2, so back up
                            }
                            else if (ch > 0xffff)
                            {
                                pSrc--;     // It was 4 bytes, nothing was stored
                            }
                            pSrc--;         // It was at least 3 bytes
                        }
                        pSrc--;             // It was at least 2 bytes
                    }
                    pSrc--;

                    // Throw that we don't have enough room (pSrc could be < chars if we had started to process
                    // a 4 byte sequence alredy)
                    Contract.Assert(pSrc >= bytes || pTarget == chars,
                        "[UTF8Encoding.GetChars]Expected pSrc to be within input buffer or throw due to no output]");
                    ThrowCharsOverflow(baseDecoder, pTarget == chars);

                    // Don't store ch in decoder, we already backed up to its start
                    ch = 0;

                    // Didn't throw, just use this buffer size.
                    break;
                }
                *pTarget = (char)ch;
                pTarget++;

#if FASTLOOP
                int availableChars = PtrDiff(pAllocatedBufferEnd, pTarget);
                int availableBytes = PtrDiff(pEnd, pSrc);

                // don't fall into the fast decoding loop if we don't have enough bytes
                // Test for availableChars is done because pStop would be <= pTarget.
                if (availableBytes <= 13) {
                    // we may need as many as 1 character per byte
                    if (availableChars < availableBytes) {
                        // not enough output room.  no pending bits at this point
                        ch = 0;
                        continue;
                    }

                    // try to get over the remainder of the ascii characters fast though
                    byte* pLocalEnd = pEnd; // hint to get pLocalEnd enregistered
                    while (pSrc < pLocalEnd) {
                        ch = *pSrc;
                        pSrc++;

                        if (ch > 0x7F)
                            goto ProcessChar;

                        *pTarget = (char)ch;
                        pTarget++;
                    }
                    // we are done
                    ch = 0;
                    break;
                }

                // we may need as many as 1 character per byte, so reduce the byte count if necessary.
                // If availableChars is too small, pStop will be before pTarget and we won't do fast loop.
                if (availableChars < availableBytes) {
                    availableBytes = availableChars;
                }

                // To compute the upper bound, assume that all characters are ASCII characters at this point,
                //  the boundary will be decreased for every non-ASCII character we encounter
                // Also, we need 7 chars reserve for the unrolled ansi decoding loop and for decoding of multibyte sequences
                char *pStop = pTarget + availableBytes - 7;

                while (pTarget < pStop) {
                    ch = *pSrc;
                    pSrc++;

                    if (ch > 0x7F) {
                        goto LongCode;
                    }
                    *pTarget = (char)ch;
                    pTarget++;

                    // get pSrc to be 2-byte aligned
                    if ((unchecked((int)pSrc) & 0x1) != 0) {
                        ch = *pSrc;
                        pSrc++;
                        if (ch > 0x7F) {
                            goto LongCode;
                        }
                        *pTarget = (char)ch;
                        pTarget++;
                    }

                    // get pSrc to be 4-byte aligned
                    if ((unchecked((int)pSrc) & 0x2) != 0) {
                        ch = *(ushort*)pSrc;
                        if ((ch & 0x8080) != 0) {
                            goto LongCodeWithMask16;
                        }

                        // Unfortunately, this is endianess sensitive
#if BIGENDIAN
                        *pTarget = (char)((ch >> 8) & 0x7F);
                        pSrc += 2;
                        *(pTarget+1) = (char)(ch & 0x7F);
                        pTarget += 2;
#else // BIGENDIAN
                        *pTarget = (char)(ch & 0x7F);
                        pSrc += 2;
                        *(pTarget+1) = (char)((ch >> 8) & 0x7F);
                        pTarget += 2;
#endif // BIGENDIAN
                    }

                    // Run 8 characters at a time!
                    while (pTarget < pStop) {
                        ch = *(int*)pSrc;
                        int chb = *(int*)(pSrc+4);
                        if (((ch | chb) & unchecked((int)0x80808080)) != 0) {
                            goto LongCodeWithMask32;
                        }

                        // Unfortunately, this is endianess sensitive
#if BIGENDIAN
                        *pTarget = (char)((ch >> 24) & 0x7F);
                        *(pTarget+1) = (char)((ch >> 16) & 0x7F);
                        *(pTarget+2) = (char)((ch >> 8) & 0x7F);
                        *(pTarget+3) = (char)(ch & 0x7F);
                        pSrc += 8;
                        *(pTarget+4) = (char)((chb >> 24) & 0x7F);
                        *(pTarget+5) = (char)((chb >> 16) & 0x7F);
                        *(pTarget+6) = (char)((chb >> 8) & 0x7F);
                        *(pTarget+7) = (char)(chb & 0x7F);
                        pTarget += 8;
#else // BIGENDIAN
                        *pTarget = (char)(ch & 0x7F);
                        *(pTarget+1) = (char)((ch >> 8) & 0x7F);
                        *(pTarget+2) = (char)((ch >> 16) & 0x7F);
                        *(pTarget+3) = (char)((ch >> 24) & 0x7F);
                        pSrc += 8;
                        *(pTarget+4) = (char)(chb & 0x7F);
                        *(pTarget+5) = (char)((chb >> 8) & 0x7F);
                        *(pTarget+6) = (char)((chb >> 16) & 0x7F);
                        *(pTarget+7) = (char)((chb >> 24) & 0x7F);
                        pTarget += 8;
#endif // BIGENDIAN
                    }
                    break;

#if BIGENDIAN
                LongCodeWithMask32:
                    // be careful about the sign extension
                    ch = (int)(((uint)ch) >> 16);
                LongCodeWithMask16:
                    ch = (int)(((uint)ch) >> 8);
#else // BIGENDIAN
                LongCodeWithMask32:
                LongCodeWithMask16:
                    ch &= 0xFF;
#endif // BIGENDIAN
                    pSrc++;
                    if (ch <= 0x7F) {
                        *pTarget = (char)ch;
                        pTarget++;
                        continue;
                    }

                LongCode:
                    int chc = *pSrc;
                    pSrc++;

                    if (
                        // bit 6 has to be zero
                        (ch & 0x40) == 0 ||
                        // we are expecting to see trailing bytes like 10vvvvvv
                        (chc & unchecked((sbyte)0xC0)) != 0x80)
                    {
                        goto BadLongCode;
                    }

                    chc &= 0x3F;

                    // start a new long code
                    if ((ch & 0x20) != 0) {

                        // fold the first two bytes together
                        chc |= (ch & 0x0F) << 6;

                        if ((ch & 0x10) != 0) {
                            // 4 byte encoding - surrogate
                            ch = *pSrc;
                            if (
                                // check that bit 4 is zero, the non-shortest form of surrogate
                                // and the valid surrogate range 0x000000 - 0x10FFFF at the same time
                                !InRange(chc >> 4, 0x01, 0x10) ||
                                // we are expecting to see trailing bytes like 10vvvvvv
                                (ch & unchecked((sbyte)0xC0)) != 0x80 )
                            {
                                goto BadLongCode;
                            }

                            chc = (chc << 6) | (ch & 0x3F);

                            ch = *(pSrc+1);
                            // we are expecting to see trailing bytes like 10vvvvvv
                            if ((ch & unchecked((sbyte)0xC0)) != 0x80) {
                                goto BadLongCode;
                            }
                            pSrc += 2;

                            ch = (chc << 6) | (ch & 0x3F);

                            *pTarget = (char)( ((ch >> 10) & 0x7FF) +
                                unchecked((short)(CharUnicodeInfo.HIGH_SURROGATE_START - (0x10000 >> 10))) );
                            pTarget++;

                            ch = (ch & 0x3FF) +
                                unchecked((short)(CharUnicodeInfo.LOW_SURROGATE_START));

                            // extra byte, we're already planning 2 chars for 2 of these bytes,
                            // but the big loop is testing the target against pStop, so we need
                            // to subtract 2 more or we risk overrunning the input.  Subtract 
                            // one here and one below.
                            pStop--;
                        }
                        else {
                            // 3 byte encoding
                            ch = *pSrc;
                            if (
                                // check for non-shortest form of 3 byte seq
                                (chc & (0x1F << 5)) == 0 ||
                                // Can't have surrogates here.
                                (chc & (0xF800 >> 6) ) == (0xD800 >> 6) ||
                                // we are expecting to see trailing bytes like 10vvvvvv
                                (ch & unchecked((sbyte)0xC0)) != 0x80 )
                            {
                                goto BadLongCode;
                            }
                            pSrc++;

                            ch = (chc << 6) | (ch & 0x3F);

                            // extra byte, we're only expecting 1 char for each of these 3 bytes,
                            // but the loop is testing the target (not source) against pStop, so
                            // we need to subtract 2 more or we risk overrunning the input.
                            // Subtract 1 here and one more below
                            pStop--;
                        }
                    }
                    else {
                        // 2 byte encoding

                        ch &= 0x1F;

                        // check for non-shortest form
                        if (ch <= 1) {
                            goto BadLongCode;
                        }
                        ch = (ch << 6) | chc;
                    }

                    *pTarget = (char)ch;
                    pTarget++;

                    // extra byte, we're only expecting 1 char for each of these 2 bytes,
                    // but the loop is testing the target (not source) against pStop.
                    // subtract an extra count from pStop so that we don't overrun the input.
                    pStop--;
                }
#endif // FASTLOOP

                Contract.Assert(pTarget <= pAllocatedBufferEnd, "[UTF8Encoding.GetChars]pTarget <= pAllocatedBufferEnd");

                // no pending bits at this point
                ch = 0;
                continue;

            BadLongCode:
                pSrc -= 2;
                ch = 0;
                continue;
            }

            if (ch != 0 && (baseDecoder == null || baseDecoder.MustFlush))
            {
                // Have to do fallback for invalid bytes
                if (fallback == null)
                {
                    if (baseDecoder == null)
                        fallback = this.decoderFallback.CreateFallbackBuffer();
                    else
                        fallback = baseDecoder.FallbackBuffer;
                    fallback.InternalInitialize(bytes, pAllocatedBufferEnd);
                }

                // This'll back us up the appropriate # of bytes if we didn't get anywhere
                if (!FallbackInvalidByteSequence(ref pSrc, ch, fallback, ref pTarget))
                {
                    Contract.Assert(pSrc >= bytes || pTarget == chars,
                        "[UTF8Encoding.GetChars]Expected to throw or remain in byte buffer while flushing");

                    // Ran out of buffer space
                    // Need to throw an exception?
                    fallback.InternalReset();
                    ThrowCharsOverflow(baseDecoder, pTarget == chars);
                }
                Contract.Assert(pSrc >= bytes, 
                    "[UTF8Encoding.GetChars]Expected flushing invalid byte sequence to have remained within the byte array");                
                ch = 0;
            }

            if (baseDecoder != null)
            {
                UTF8Decoder decoder = (UTF8Decoder)baseDecoder;

                // If we're storing flush data we expect all bits to be used or else
                // we're stuck in the middle of a conversion
                Contract.Assert(!baseDecoder.MustFlush || ch == 0 || !baseDecoder.m_throwOnOverflow,
                    "[UTF8Encoding.GetChars]Expected no must flush or no left over bits or no throw on overflow.");

                // Remember our leftover bits.
                decoder.bits = ch;

                baseDecoder.m_bytesUsed = (int)(pSrc - bytes);
            }

            // Shouldn't have anything in fallback buffer for GetChars
            // (don't have to check m_throwOnOverflow for chars)
            Contract.Assert(fallback == null || fallback.Remaining == 0,
                "[UTF8Encoding.GetChars]Expected empty fallback buffer at end");

            return PtrDiff(pTarget, chars);
        }
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            BCLDebug.Assert(bytes!=null, "[UTF32Encoding.GetCharCount]bytes!=null");
            BCLDebug.Assert(count >=0, "[UTF32Encoding.GetCharCount]count >=0");

            UTF32Decoder decoder = (UTF32Decoder)baseDecoder;

            // None so far!
            int charCount = 0;
            byte* end = bytes + count;
            byte* byteStart = bytes;

            // Set up decoder
            int readCount = 0;
            uint iChar = 0;

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            // See if there's anything in our decoder
            if (decoder != null)
            {
                readCount = decoder.readByteCount;
                iChar = (uint)decoder.iChar;
                fallbackBuffer = decoder.FallbackBuffer;

                // Shouldn't have anything in fallback buffer for GetCharCount
                // (don't have to check m_throwOnOverflow for chars or count)
                BCLDebug.Assert(fallbackBuffer.Remaining == 0,
                    "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
            }
            else
            {
                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
            }

            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(byteStart, null);

            // Loop through our input, 4 characters at a time!
            while (bytes < end && charCount >= 0)
            {
                // Get our next character
                if(bigEndian)
                {
                    // Scoot left and add it to the bottom
                    iChar <<= 8;
                    iChar += *(bytes++);
                }
                else
                {
                    // Scoot right and add it to the top
                    iChar >>= 8;
                    iChar += (uint)(*(bytes++)) << 24;
                }

                readCount++;

                // See if we have all the bytes yet
                if (readCount < 4)
                    continue;

                // Have the bytes
                readCount = 0;

                // See if its valid to encode
                if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
                {
                    // Need to fall back these 4 bytes
                    byte[] fallbackBytes;
                    if (this.bigEndian)
                    {
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
                            unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
                    }
                    else
                    {
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
                            unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
                    }

                    charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);

                    // Ignore the illegal character
                    iChar = 0;
                    continue;
                }

                // Ok, we have something we can add to our output
                if (iChar >= 0x10000)
                {
                    // Surrogates take 2
                    charCount++;
                }

                // Add the rest of the surrogate or our normal character
                charCount++;

                // iChar is back to 0
                iChar = 0;
            }

            // See if we have something left over that has to be decoded
            if (readCount > 0 && (decoder == null || decoder.MustFlush))
            {
                // Oops, there's something left over with no place to go.
                byte[] fallbackBytes = new byte[readCount];
                if (this.bigEndian)
                {
                    while(readCount > 0)
                    {
                        fallbackBytes[--readCount] = unchecked((byte)iChar);
                        iChar >>= 8;
                    }
                }
                else
                {
                    while (readCount > 0)
                    {
                        fallbackBytes[--readCount] = unchecked((byte)(iChar>>24));
                        iChar <<= 8;
                    }
                }

                charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
            }

            // Check for overflows.
            if (charCount < 0)
                throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));

            // Shouldn't have anything in fallback buffer for GetCharCount
            // (don't have to check m_throwOnOverflow for chars or count)
            BCLDebug.Assert(fallbackBuffer.Remaining == 0,
                "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");

            // Return our count
            return charCount;
        }
Example #55
0
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;

            // Need last vars
            int lastByte = -1;
            char lastChar = (char)0;

            // Get our decoder (but don't clear it yet)
            if (decoder != null)
            {
                lastByte = decoder.lastByte;
                lastChar = decoder.lastChar;
            }

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            byte* byteEnd = bytes + byteCount;
            char* charEnd = chars + charCount;
            byte* byteStart = bytes;
            char* charStart = chars;

            while (bytes < byteEnd)
            {
                // If we're aligned then maybe we can do it fast
                // This'll hurt if we're unaligned because we'll always test but never be aligned
#if !NO_FAST_UNICODE_LOOP
#if BIGENDIAN
                if (bigEndian &&
#else // BIGENDIAN
                if (!bigEndian &&
#endif // BIGENDIAN
#if WIN64 // win64 has to be long aligned
                    (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
#else
 (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
#endif // WIN64
 lastByte == -1 && lastChar == 0)
                {
                    // Need -1 to check 2 at a time.  If we have an even #, longChars will go
                    // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
                    // will go from longEnd - 1 long to longEnd. (Might not get to use this)
                    // We can only go iCount units (limited by shorter of char or byte buffers.
                    ulong* longEnd = (ulong*)(bytes - 7 +
                                                (((byteEnd - bytes) >> 1 < charEnd - chars) ?
                                                  (byteEnd - bytes) : (charEnd - chars) << 1));

                    // Need new char* so we can check 4 at a time
                    ulong* longBytes = (ulong*)bytes;
                    ulong* longChars = (ulong*)chars;

                    while (longBytes < longEnd)
                    {
                        // See if we potentially have surrogates (0x8000 bit set)
                        // (We're either big endian on a big endian machine or little endian on 
                        // a little endian machine so this'll work)
                        if ((0x8000800080008000 & *longBytes) != 0)
                        {
                            // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
                            // 5 bits looks like 11011, then its a high or low surrogate.
                            // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
                            // Note that we expect BMP characters to be more common than surrogates
                            // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
                            ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;

                            // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
                            // but no clue if they're high or low.
                            // If each of the 4 characters are non-zero, then none are surrogates.
                            if ((uTemp & 0xFFFF000000000000) == 0 ||
                                (uTemp & 0x0000FFFF00000000) == 0 ||
                                (uTemp & 0x00000000FFFF0000) == 0 ||
                                (uTemp & 0x000000000000FFFF) == 0)
                            {
                                // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
                                // or if there's 1 or 4 surrogates

                                // If they happen to be high/low/high/low, we may as well continue.  Check the next
                                // bit to see if its set (low) or not (high) in the right pattern
#if BIGENDIAN
                                if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
#else
                                if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
#endif
                                {
                                    // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
                                    // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.

                                    // Drop out to the slow loop to resolve the surrogates
                                    break;
                                }
                                // else they are all surrogates in High/Low/High/Low order, so we can use them.
                            }
                            // else none are surrogates, so we can use them.
                        }
                        // else all < 0x8000 so we can use them

                        // We can use these 4 chars.
                        *longChars = *longBytes;
                        longBytes++;
                        longChars++;
                    }

                    chars = (char*)longChars;
                    bytes = (byte*)longBytes;

                    if (bytes >= byteEnd)
                        break;
                }
#endif // !NO_FAST_UNICODE_LOOP

                // Get 1st byte
                if (lastByte < 0)
                {
                    lastByte = *bytes++;
                    continue;
                }

                // Get full char
                char ch;
                if (bigEndian)
                {
                    ch = (char)(lastByte << 8 | *(bytes++));
                }
                else
                {
                    ch = (char)(*(bytes++) << 8 | lastByte);
                }
                lastByte = -1;

                // See if the char's valid
                if (ch >= 0xd800 && ch <= 0xdfff)
                {
                    // Was it a high surrogate?
                    if (ch <= 0xdbff)
                    {
                        // Its a high surrogate, if we had one then do fallback for previous one
                        if (lastChar > 0)
                        {
                            // Get fallback for previous high surrogate
                            // Note we have to reconstruct bytes because some may have been in decoder
                            byte[] byteBuffer = null;
                            if (bigEndian)
                            {
                                byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
                            }
                            else
                            {
                                byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };

                            }

                            if (fallbackBuffer == null)
                            {
                                if (decoder == null)
                                    fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                                else
                                    fallbackBuffer = decoder.FallbackBuffer;

                                // Set our internal fallback interesting things.
                                fallbackBuffer.InternalInitialize(byteStart, charEnd);
                            }

                            if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
                            {
                                bytes -= 2;                                       // didn't use these 2 bytes
                                fallbackBuffer.InternalReset();
                                ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                                break;                                          // couldn't fallback but didn't throw
                            }
                        }

                        // Ignore the previous high surrogate which fell back already,
                        // yet remember the current high surrogate for next time.
                        lastChar = ch;
                        continue;
                    }

                    // Its a low surrogate
                    if (lastChar == 0)
                    {
                        // Expected a previous high surrogate
                        // Get fallback for this low surrogate
                        // Note we have to reconstruct bytes because some may have been in decoder
                        byte[] byteBuffer = null;
                        if (bigEndian)
                        {
                            byteBuffer = new byte[] { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
                        }
                        else
                        {
                            byteBuffer = new byte[] { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };

                        }

                        if (fallbackBuffer == null)
                        {
                            if (decoder == null)
                                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                            else
                                fallbackBuffer = decoder.FallbackBuffer;

                            // Set our internal fallback interesting things.
                            fallbackBuffer.InternalInitialize(byteStart, charEnd);
                        }

                        if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
                        {
                            bytes -= 2;                                       // didn't use these 2 bytes
                            fallbackBuffer.InternalReset();
                            ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                            break;                                          // couldn't fallback but didn't throw
                        }

                        // Didn't throw, ignore this one (we already did its fallback)
                        continue;
                    }

                    // Valid surrogate pair, add our lastChar (will need 2 chars)
                    if (chars >= charEnd - 1)
                    {
                        bytes -= 2;                                       // didn't use these 2 bytes
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        // Leave lastChar for next call to Convert()
                        break;                                          // couldn't fallback but didn't throw
                    }

                    *chars++ = lastChar;
                    lastChar = (char)0;
                }
                else if (lastChar > 0)
                {
                    // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
                    byte[] byteBuffer = null;
                    if (bigEndian)
                    {
                        byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
                    }
                    else
                    {
                        byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };

                    }

                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;

                        // Set our internal fallback interesting things.
                        fallbackBuffer.InternalInitialize(byteStart, charEnd);
                    }

                    if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
                    {
                        bytes -= 2;                                       // didn't use these 2 bytes
                        fallbackBuffer.InternalReset();
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        break;                                          // couldn't fallback but didn't throw
                    }

                    // Not left over now, clear previous high surrogate and continue to add current char
                    lastChar = (char)0;
                }

                // Valid char, room for it?
                if (chars >= charEnd)
                {
                    bytes -= 2;                                       // didn't use these bytes
                    ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                    break;                                          // couldn't fallback but didn't throw
                }

                // add it
                *chars++ = ch;
            }

            // Remember our decoder if we must
            if (decoder == null || decoder.MustFlush)
            {
                if (lastChar > 0)
                {
                    // No hanging high surrogates allowed, do fallback and remove count for it
                    byte[] byteBuffer = null;
                    if (bigEndian)
                    {
                        byteBuffer = new byte[] { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
                    }
                    else
                    {
                        byteBuffer = new byte[] { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };

                    }

                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;

                        // Set our internal fallback interesting things.
                        fallbackBuffer.InternalInitialize(byteStart, charEnd);
                    }

                    if (!fallbackBuffer.InternalFallback(byteBuffer, bytes, ref chars))
                    {
                        // 2 bytes couldn't fall back
                        // We either advanced bytes or chars should == charStart and throw below
                        bytes -= 2;                                       // didn't use these bytes
                        if (lastByte >= 0)
                            bytes--;                                    // had an extra last byte hanging around
                        fallbackBuffer.InternalReset();
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        // We'll remember these in our decoder though
                        bytes += 2;
                        if (lastByte >= 0)
                            bytes++;
                        goto End;
                    }

                    // done with this one
                    lastChar = (char)0;
                }

                if (lastByte >= 0)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;

                        // Set our internal fallback interesting things.
                        fallbackBuffer.InternalInitialize(byteStart, charEnd);
                    }

                    // No hanging odd bytes allowed if must flush
                    if (!fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref chars))
                    {
                        // odd byte couldn't fall back
                        bytes--;                                        // didn't use this byte
                        fallbackBuffer.InternalReset();
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        // didn't throw, but we'll remember it in the decoder
                        bytes++;
                        goto End;
                    }

                    // Didn't fail, clear buffer
                    lastByte = -1;
                }
            }

        End:

            // Remember our decoder if we must
            if (decoder != null)
            {
                decoder.m_bytesUsed = (int)(bytes - byteStart);
                decoder.lastChar = lastChar;
                decoder.lastByte = lastByte;
            }

            // Used to do this the old way
            // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);

            return (int)(chars - charStart);
        }
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            BCLDebug.Assert(chars!=null, "[UTF32Encoding.GetChars]chars!=null");
            BCLDebug.Assert(bytes!=null, "[UTF32Encoding.GetChars]bytes!=null");
            BCLDebug.Assert(byteCount >=0, "[UTF32Encoding.GetChars]byteCount >=0");
            BCLDebug.Assert(charCount >=0, "[UTF32Encoding.GetChars]charCount >=0");

            UTF32Decoder decoder = (UTF32Decoder)baseDecoder;

            // None so far!
            char* charStart = chars;
            char* charEnd = chars + charCount;

            byte* byteStart = bytes;
            byte* byteEnd = bytes + byteCount;

            // See if there's anything in our decoder (but don't clear it yet)
            int readCount = 0;
            uint iChar = 0;

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null;

            // See if there's anything in our decoder
            if (decoder != null)
            {
                readCount = decoder.readByteCount;
                iChar = (uint)decoder.iChar;
                fallbackBuffer = baseDecoder.FallbackBuffer;

                // Shouldn't have anything in fallback buffer for GetChars
                // (don't have to check m_throwOnOverflow for chars)
                BCLDebug.Assert(fallbackBuffer.Remaining == 0,
                    "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
            }
            else
            {
                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
            }

            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(bytes, chars + charCount);

            // Loop through our input, 4 characters at a time!
            while (bytes < byteEnd)
            {
                // Get our next character
                if(bigEndian)
                {
                    // Scoot left and add it to the bottom
                    iChar <<= 8;
                    iChar += *(bytes++);
                }
                else
                {
                    // Scoot right and add it to the top
                    iChar >>= 8;
                    iChar += (uint)(*(bytes++)) << 24;
                }

                readCount++;

                // See if we have all the bytes yet
                if (readCount < 4)
                    continue;

                // Have the bytes
                readCount = 0;

                // See if its valid to encode
                if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
                {
                    // Need to fall back these 4 bytes
                    byte[] fallbackBytes;
                    if (this.bigEndian)
                    {
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
                            unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
                    }
                    else
                    {
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
                            unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
                    }

                    // Chars won't be updated unless this works.
                    if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
                    {
                        // Couldn't fallback, throw or wait til next time
                        // We either read enough bytes for bytes-=4 to work, or we're
                        // going to throw in ThrowCharsOverflow because chars == charStart
                        BCLDebug.Assert(bytes >= byteStart + 4 || chars == charStart,
                            "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
                        bytes-=4;                                       // get back to where we were
                        iChar=0;                                        // Remembering nothing
                        fallbackBuffer.InternalReset();
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        break;                                          // Stop here, didn't throw
                    }

                    // Ignore the illegal character
                    iChar = 0;
                    continue;
                }


                // Ok, we have something we can add to our output
                if (iChar >= 0x10000)
                {
                    // Surrogates take 2
                    if (chars >= charEnd - 1)
                    {
                        // Throwing or stopping
                        // We either read enough bytes for bytes-=4 to work, or we're
                        // going to throw in ThrowCharsOverflow because chars == charStart
                        BCLDebug.Assert(bytes >= byteStart + 4 || chars == charStart,
                            "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
                        bytes-=4;                                       // get back to where we were
                        iChar=0;                                        // Remembering nothing
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        break;                                          // Stop here, didn't throw
                    }

                    *(chars++) = GetHighSurrogate(iChar);
                    iChar = GetLowSurrogate(iChar);
                }
                // Bounds check for normal character
                else if (chars >= charEnd)
                {
                    // Throwing or stopping
                    // We either read enough bytes for bytes-=4 to work, or we're
                    // going to throw in ThrowCharsOverflow because chars == charStart
                    BCLDebug.Assert(bytes >= byteStart + 4 || chars == charStart,
                        "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
                    bytes-=4;                                       // get back to where we were
                    iChar=0;                                        // Remembering nothing                    
                    ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                    break;                                          // Stop here, didn't throw
                }

                // Add the rest of the surrogate or our normal character
                *(chars++) = (char)iChar;

                // iChar is back to 0
                iChar = 0;
            }

            // See if we have something left over that has to be decoded
            if (readCount > 0 && (decoder == null || decoder.MustFlush))
            {
                // Oops, there's something left over with no place to go.
                byte[] fallbackBytes = new byte[readCount];
                int tempCount = readCount;
                if (this.bigEndian)
                {
                    while(tempCount > 0)
                    {
                        fallbackBytes[--tempCount] = unchecked((byte)iChar);
                        iChar >>= 8;
                    }
                }
                else
                {
                    while (tempCount > 0)
                    {
                        fallbackBytes[--tempCount] = unchecked((byte)(iChar>>24));
                        iChar <<= 8;
                    }
                }

                if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
                {
                    // Couldn't fallback.
                    fallbackBuffer.InternalReset();
                    ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                    // Stop here, didn't throw, backed up, so still nothing in buffer
                }
                else
                {
                    // Don't clear our decoder unless we could fall it back.
                    // If we caught the if above, then we're a convert() and will catch this next time.
                    readCount = 0;
                    iChar = 0;
                }
            }

            // Remember any left over stuff, clearing buffer as well for MustFlush
            if (decoder != null)
            {
                decoder.iChar = (int)iChar;
                decoder.readByteCount = readCount;
                decoder.m_bytesUsed = (int)(bytes - byteStart);
            }

            // Shouldn't have anything in fallback buffer for GetChars
            // (don't have to check m_throwOnOverflow for chars)
            BCLDebug.Assert(fallbackBuffer.Remaining == 0,
                "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");

            // Return our count
            return (int)(chars - charStart);
        }
Example #57
0
        [System.Security.SecurityCritical]  // auto-generated
        public override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            Debug.Assert(bytes != null, "[DBCSCodePageEncoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[DBCSCodePageEncoding.GetChars]byteCount is negative");
            Debug.Assert(chars != null, "[DBCSCodePageEncoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[DBCSCodePageEncoding.GetChars]charCount is negative");

            CheckMemorySection();

            // Fix our decoder
            DBCSDecoder decoder = (DBCSDecoder)baseDecoder;

            // We'll need to know where the end is
            byte* byteStart = bytes;
            byte* byteEnd = bytes + byteCount;
            char* charStart = chars;
            char* charEnd = chars + charCount;
            bool bUsedDecoder = false;

            // Get our fallback
            DecoderFallbackBuffer fallbackBuffer = null;

            // Shouldn't have anything in fallback buffer for GetChars
            Debug.Assert(decoder == null || !decoder.m_throwOnOverflow ||
                !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
                "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at start");

            DecoderFallbackBufferHelper fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);

            // If we have a left over byte, use it
            if (decoder != null && decoder.bLeftOver > 0)
            {
                // We have a left over byte?
                if (byteCount == 0)
                {
                    // No input though
                    if (!decoder.MustFlush)
                    {
                        // Don't have to flush
                        return 0;
                    }

                    // Well, we're flushing, so use '?' or fallback
                    // fallback leftover byte
                    Debug.Assert(fallbackBuffer == null,
                        "[DBCSCodePageEncoding.GetChars]Expected empty fallback");
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);
                    fallbackHelper.InternalInitialize(bytes, charEnd);

                    // If no room, it's hopeless, this was 1st fallback
                    byte[] byteBuffer = new byte[] { unchecked((byte)decoder.bLeftOver) };
                    if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars))
                        ThrowCharsOverflow(decoder, true);

                    decoder.bLeftOver = 0;

                    // Done, return it
                    return (int)(chars - charStart);
                }

                // Get our full info
                int iBytes = decoder.bLeftOver << 8;
                iBytes |= (*bytes);
                bytes++;

                // Look up our bytes
                char cDecoder = mapBytesToUnicode[iBytes];
                if (cDecoder == UNKNOWN_CHAR_FLAG && iBytes != 0)
                {
                    Debug.Assert(fallbackBuffer == null,
                        "[DBCSCodePageEncoding.GetChars]Expected empty fallback for two bytes");
                    fallbackBuffer = decoder.FallbackBuffer;
                    fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);
                    fallbackHelper.InternalInitialize(byteEnd - byteCount, charEnd);

                    byte[] byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
                    if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars))
                        ThrowCharsOverflow(decoder, true);
                }
                else
                {
                    // Do we have output room?, hopeless if not, this is first char
                    if (chars >= charEnd)
                        ThrowCharsOverflow(decoder, true);

                    *(chars++) = cDecoder;
                }
            }

            // Loop, paying attention to our fallbacks.
            while (bytes < byteEnd)
            {
                // Faster if don't use *bytes++;
                int iBytes = *bytes;
                bytes++;
                char c = mapBytesToUnicode[iBytes];

                // See if it was a double byte character
                if (c == LEAD_BYTE_CHAR)
                {
                    // Its a lead byte
                    if (bytes < byteEnd)
                    {
                        // Have another to use, so use it
                        iBytes <<= 8;
                        iBytes |= *bytes;
                        bytes++;
                        c = mapBytesToUnicode[iBytes];
                    }
                    else
                    {
                        // No input left
                        if (decoder == null || decoder.MustFlush)
                        {
                            // have to flush anyway, set to unknown so we use fallback
                            c = UNKNOWN_CHAR_FLAG;
                        }
                        else
                        {
                            // Stick it in decoder
                            bUsedDecoder = true;
                            decoder.bLeftOver = (byte)iBytes;
                            break;
                        }
                    }
                }

                // See if it was unknown
                if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
                {
                    if (fallbackBuffer == null)
                    {
                        if (decoder == null)
                            fallbackBuffer = DecoderFallback.CreateFallbackBuffer();
                        else
                            fallbackBuffer = decoder.FallbackBuffer;
                        fallbackHelper = new DecoderFallbackBufferHelper(fallbackBuffer);
                        fallbackHelper.InternalInitialize(byteEnd - byteCount, charEnd);
                    }

                    // Do fallback
                    byte[] byteBuffer = null;
                    if (iBytes < 0x100)
                        byteBuffer = new byte[] { unchecked((byte)iBytes) };
                    else
                        byteBuffer = new byte[] { unchecked((byte)(iBytes >> 8)), unchecked((byte)iBytes) };
                    if (!fallbackHelper.InternalFallback(byteBuffer, bytes, ref chars))
                    {
                        // May or may not throw, but we didn't get these byte(s)
                        Debug.Assert(bytes >= byteStart + byteBuffer.Length,
                            "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for fallback");
                        bytes -= byteBuffer.Length;                           // didn't use these byte(s)
                        fallbackHelper.InternalReset();                     // Didn't fall this back
                        ThrowCharsOverflow(decoder, bytes == byteStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }
                }
                else
                {
                    // Do we have buffer room?
                    if (chars >= charEnd)
                    {
                        // May or may not throw, but we didn't get these byte(s)
                        Debug.Assert(bytes > byteStart,
                            "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for lead byte");
                        bytes--;                                            // unused byte
                        if (iBytes >= 0x100)
                        {
                            Debug.Assert(bytes > byteStart,
                                "[DBCSCodePageEncoding.GetChars]Expected bytes to have advanced for trail byte");
                            bytes--;                                        // 2nd unused byte
                        }
                        ThrowCharsOverflow(decoder, bytes == byteStart);    // throw?
                        break;                                              // don't throw, but stop loop
                    }

                    *(chars++) = c;
                }
            }

            // We already stuck it in encoder if necessary, but we have to clear cases where nothing new got into decoder
            if (decoder != null)
            {
                // Clear it in case of MustFlush
                if (bUsedDecoder == false)
                {
                    decoder.bLeftOver = 0;
                }

                // Remember our count
                decoder.m_bytesUsed = (int)(bytes - byteStart);
            }

            // Shouldn't have anything in fallback buffer for GetChars
            Debug.Assert(decoder == null || !decoder.m_throwOnOverflow ||
                !decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
                "[DBCSCodePageEncoding.GetChars]Expected empty fallback buffer at end");

            // Return length of our output
            return (int)(chars - charStart);
        }
Example #58
0
 [System.Security.SecurityCritical]  // auto-generated
 public override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
 {
     // Just call GetChars() with null chars to count
     return GetChars(bytes, count, null, 0, baseDecoder);
 }
        // For decoding, the following interesting rules apply:
        // Virama followed by another Virama or Nukta becomes Virama + ZWNJ or Virama + ZWJ
        // ATR is followed by a byte to switch code pages ("fonts")
        // Devenagari F0, B8 -> \u0952
        // Devenagari F0, BF -> \u0970
        // Some characters followed by E9 become a different character instead.
        internal override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            // Allow null chars for counting
            BCLDebug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null");
            BCLDebug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative");
//            BCLDebug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null");
            BCLDebug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative");

            // Need the ISCII Decoder
            ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder;

            // Get our info.
            Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
                this, decoder, chars, charCount, bytes, byteCount);

            int  currentCodePage = this.defaultCodePage;
            bool bLastATR = false;
            bool bLastVirama = false;
            bool bLastDevenagariStressAbbr = false;
            char cLastCharForNextNukta = '\0';
            char cLastCharForNoNextNukta = '\0';

            // See if there's anything in our decoder
            if (decoder != null)
            {
                currentCodePage = decoder.currentCodePage;
                bLastATR = decoder.bLastATR;
                bLastVirama = decoder.bLastVirama;
                bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr;
                cLastCharForNextNukta = decoder.cLastCharForNextNukta;
                cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta;
            }

            bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr |
                (cLastCharForNextNukta != '\0');

            // Get our current code page index (some code pages are dups)
            int currentCodePageIndex = -1;
            BCLDebug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi,
                "[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage);

            if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
            {
                currentCodePageIndex = IndicMappingIndex[currentCodePage];
            }

            // Loop through our input
            while (buffer.MoreData)
            {
                byte b = buffer.GetNextByte();

                // See if last one was special
                if (bLastSpecial)
                {
                    // Now it won't be
                    bLastSpecial = false;

                    // One and only one of our flags should be set
                    BCLDebug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) +
                               (bLastDevenagariStressAbbr ? 1 : 0) +
                               ((cLastCharForNextNukta > 0) ? 1 : 0)) == 1,
                        String.Format(CultureInfo.InvariantCulture,
                            "[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}",
                            bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta));
                    // If the last one was an ATR, then we'll have to do ATR stuff
                    if (bLastATR)
                    {
                        // No longer last ATR, we know it wasn't bLastVirama
                        bLastATR = false;

                        // We only support Devanagari - Punjabi
                        if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi))
                        {
                            // Remember the code page
                            currentCodePage = b & 0xf;
                            currentCodePageIndex = IndicMappingIndex[currentCodePage];
                            continue;
                        }

                        // Change back to default?
                        if (b == 0x40)
                        {
                            currentCodePage = this.defaultCodePage;
                            currentCodePageIndex = -1;

                            if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
                            {
                                currentCodePageIndex = IndicMappingIndex[currentCodePage];
                            }
                            continue;
                        }

                        // We don't support Roman
                        if (b == 0x41)
                        {
                            currentCodePage = this.defaultCodePage;
                            currentCodePageIndex = -1;

                            if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi)
                            {
                                currentCodePageIndex = IndicMappingIndex[currentCodePage];
                            }

                            // Even though we don't know how to support Roman, windows didn't add a ? so we don't either.
                            continue;
                        }

                        // Other code pages & ATR codes not supported, fallback the ATR
                        buffer.Fallback(ControlATR);

                        // turn off things
                        bLastVirama = false;
                        bLastATR = false;
                        bLastDevenagariStressAbbr = false;
                        cLastCharForNextNukta = (char)0;
                        cLastCharForNoNextNukta = (char)0;

                        // Keep processing this byte
                    }
                    else if (bLastVirama)
                    {
                        // If last was Virama, then we might need ZWNJ or ZWJ instead
                        if (b == Virama)
                        {
                            // If no room, then stop
                            if (!buffer.AddChar(ZWNJ))
                                break;
                            bLastVirama = false;
                            continue;
                        }
                        if (b == Nukta)
                        {
                            // If no room, then stop
                            if (!buffer.AddChar(ZWJ))
                                break;
                            bLastVirama = false;
                            continue;
                        }
                        bLastVirama = false;
                    }
                    else if (bLastDevenagariStressAbbr)
                    {
                        // Last byte was an f0 (ext).
                        // If current is b8 or bf, then we have 952 or 970.  Otherwise fallback
                        if (b == 0xb8)
                        {
                            // It was a b8
                            if (!buffer.AddChar('\x0952'))         // Devanagari stress sign anudatta
                                break;
                            bLastDevenagariStressAbbr = false;
                            continue;
                        }

                        if (b == 0xbf)
                        {
                            // It was a bf
                            if (!buffer.AddChar('\x0970'))         // Devanagari abbr. sign
                                break;
                            bLastDevenagariStressAbbr = false;
                            continue;
                        }

                        // Wasn't an expected pattern, do fallback for f0 (ext) and
                        bLastDevenagariStressAbbr = false;
                        buffer.Fallback(DevenagariExt);
                        // Keep processing this byte
                    }
                    else
                    {
                        // We were checking for next char being a nukta
                        BCLDebug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0,
                            "[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set.");

                        // We'll either add combined char or last char
                        if (b == Nukta)
                        {
                            // We combine nukta with previous char
                            if (!buffer.AddChar(cLastCharForNextNukta))
                                break;

                            // Done already
                            cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
                            continue;
                        }

                        // No Nukta, just add last character and keep processing current byte
                        if (!buffer.AddChar(cLastCharForNoNextNukta))
                            break;
                        cLastCharForNextNukta = cLastCharForNoNextNukta = '\0';
                        // Keep processing this byte
                    }
                }

                // Now bLastSpecial should be false and all flags false.
                BCLDebug.Assert (!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR &&
                          cLastCharForNextNukta == '\0',
                          "[ISCIIEncoding.GetChars]No special state for last code point should exist at this point.");

                // If its a simple byte, just add it
                if (b < MultiByteBegin)
                {
                    if (!buffer.AddChar((char)b))
                        break;
                    continue;
                }

                // See if its an ATR marker
                if (b == ControlATR)
                {
                    bLastATR = bLastSpecial = true;
                    continue;
                }

                BCLDebug.Assert (currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1");
                char ch = IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin];
                char cAlt = IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin];

                // If no 2nd char, just add it, also lonely Nuktas get added as well.
                if (cAlt == 0 || b == Nukta)
                {
                    // If it was an unknown character do fallback

                    // ? if not known.
                    if (ch == 0)
                    {
                        if (!buffer.Fallback(b))
                            break;
                    }
                    else
                    {
                        if (!buffer.AddChar(ch))
                            break;
                    }
                    continue;
                }

                // if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed.
                if (b == Virama)
                {
                    if (!buffer.AddChar(ch))
                        break;
                    bLastVirama = bLastSpecial = true;
                    continue;
                }

                // See if its one that changes with a Nukta
                if ((cAlt & 0xF000) == 0)
                {
                    // It could change if next char is a nukta
                    bLastSpecial = true;
                    cLastCharForNextNukta = cAlt;
                    cLastCharForNoNextNukta = ch;
                    continue;
                }

                // We must be the Devenagari special case for F0, B8 & F0, BF
                BCLDebug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt,
                    String.Format(CultureInfo.InvariantCulture,
                        "[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.",
                        DevenagariExt, b, CodeDevanagari, currentCodePage));
                bLastDevenagariStressAbbr = bLastSpecial = true;

            }

            // If we don't have a decoder, or if we had to flush, then we need to get rid
            // of last ATR, LastNoNextNukta and LastDevenagariExt.
            if (decoder == null || decoder.MustFlush)
            {
                // If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well.
                if (bLastATR)
                {
                    // Have to add ATR fallback
                    if (buffer.Fallback(ControlATR))
                        bLastATR = false;
                    else
                        // If not successful, convert will maintain state for next time, also
                        // AddChar will have decremented our byte count, however we need it to remain the same
                        buffer.GetNextByte();
                }
                else if (bLastDevenagariStressAbbr)
                {
                    // Have to do fallback for DevenagariExt
                    if (buffer.Fallback(DevenagariExt))
                        bLastDevenagariStressAbbr = false;
                    else
                        // If not successful, convert will maintain state for next time, also
                        // AddChar will have decremented our byte count, however we need it to remain the same
                        buffer.GetNextByte();
                }
                else if (cLastCharForNoNextNukta != '\0')
                {
                    // Have to add our last char because there was no next nukta
                    if (buffer.AddChar(cLastCharForNoNextNukta))
                        cLastCharForNoNextNukta = cLastCharForNextNukta = '\0';
                    else
                        // If not successful, convert will maintain state for next time, also
                        // AddChar will have decremented our byte count, however we need it to remain the same
                        buffer.GetNextByte();
                }
                // LastVirama is unimportant for flushing decoder.
            }

            // Remember any left over stuff
            // (only remember if we aren't counting)
            if (decoder != null && chars != null)
            {
                // If not flushing or have state (from convert) then need to remember state
                if (!decoder.MustFlush ||
                    cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr)
                {
                    // Either not flushing or had state (from convert)
                    BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
                        "[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing");
                    decoder.currentCodePage = currentCodePage;
                    decoder.bLastVirama = bLastVirama;
                    decoder.bLastATR = bLastATR;
                    decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr;
                    decoder.cLastCharForNextNukta = cLastCharForNextNukta;
                    decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta;
                }
                else
                {
                    decoder.currentCodePage = this.defaultCodePage;
                    decoder.bLastVirama = false;
                    decoder.bLastATR = false;
                    decoder.bLastDevenagariStressAbbr = false;
                    decoder.cLastCharForNextNukta = '\0';
                    decoder.cLastCharForNoNextNukta = '\0';
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }
            // Otherwise we already did fallback and added extra things

            // Return the # of characters we found
            return buffer.Count;
        }
Example #60
0
        [System.Security.SecurityCritical]  // auto-generated
        public override unsafe int GetChars(byte* bytes, int byteCount,
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            // Just need to ASSERT, this is called by something else internal that checked parameters already
            // We'll allow null chars as a count
            Debug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null");
            Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative");
            //            Debug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null");
            Debug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative");

            // Fix our decoder
            GB18030Decoder decoder = (GB18030Decoder)baseDecoder;

            // Get our info.
            EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount);

            // Need temp bytes because we can't muss up decoder
            short byte1 = -1;
            short byte2 = -1;
            short byte3 = -1;
            short byte4 = -1;

            // See if there was anything to get out of the decoder
            if (decoder != null && decoder.bLeftOver1 != -1)
            {
                // Need temp bytes because we can't muss up decoder
                byte1 = decoder.bLeftOver1;
                byte2 = decoder.bLeftOver2;
                byte3 = decoder.bLeftOver3;
                byte4 = decoder.bLeftOver4;

                // Loop because we might have too many in buffer
                // This could happen if we are working on a 4 byte sequence, but it isn't valid.
                while (byte1 != -1)
                {
                    // If its not a lead byte, use ? or its value, then scoot them down & try again
                    // This could happen if we previously had a bad 4 byte sequence and this is a trail byte
                    if (!IsGBLeadByte(byte1))
                    {
                        // This is either a ? or ASCII, need 1 char output
                        if (byte1 <= 0x7f)
                        {
                            if (!buffer.AddChar((char)byte1))      // Its ASCII
                                break;
                        }
                        else
                        {
                            if (!buffer.Fallback((byte)byte1))     // Not a valid byte
                                break;
                        }

                        byte1 = byte2;
                        byte2 = byte3;
                        byte3 = byte4;
                        byte4 = -1;
                        continue;
                    }

                    // Read in more bytes as needed
                    while (byte2 == -1 ||
                           (IsGBFourByteTrailing(byte2) && byte4 == -1))
                    {
                        // Do we have room?
                        if (!buffer.MoreData)
                        {
                            // No input left to read, do we have to flush?
                            if (!decoder.MustFlush)
                            {
                                // Don't stick stuff in decoder when counting
                                if (chars != null)
                                {
                                    // Don't have to flush, won't have any chars
                                    // Decoder is correct, just return
                                    decoder.bLeftOver1 = byte1;
                                    decoder.bLeftOver2 = byte2;
                                    decoder.bLeftOver3 = byte3;
                                    decoder.bLeftOver4 = byte4;
                                }

                                decoder.m_bytesUsed = buffer.BytesUsed;
                                return buffer.Count;
                            }

                            // We'll have to flush, add a ? and scoot them down to try again
                            // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out
                            // Breaking will do this because we have zeros
                            break;
                        }

                        // Read them in
                        if (byte2 == -1) byte2 = buffer.GetNextByte();
                        else if (byte3 == -1) byte3 = buffer.GetNextByte();
                        else byte4 = buffer.GetNextByte();
                    }

                    // Now we have our 2 or 4 bytes
                    if (IsGBTwoByteTrailing(byte2))
                    {
                        //
                        // The trailing byte is a GB18030 two-byte sequence trailing byte.
                        //
                        int iTwoBytes = byte1 << 8;
                        iTwoBytes |= unchecked((byte)byte2);
                        if (!buffer.AddChar(mapBytesToUnicode[iTwoBytes], 2))
                            break;

                        // We're done with it
                        byte1 = -1;
                        byte2 = -1;
                    }
                    else if (IsGBFourByteTrailing(byte2) &&
                             IsGBLeadByte(byte3) &&
                             IsGBFourByteTrailing(byte4))
                    {
                        //
                        // Four-byte GB18030
                        //

                        int sFourBytesOffset = GetFourBytesOffset(
                            byte1, byte2, byte3, byte4);

                        // What kind is it?
                        if (sFourBytesOffset <= GBLast4ByteCode)
                        {
                            //
                            // The Unicode will be in the BMP range.
                            //
                            if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4))
                                break;
                        }
                        else if (sFourBytesOffset >= GBSurrogateOffset &&
                                 sFourBytesOffset <= GBLastSurrogateOffset)
                        {
                            //
                            // This will be converted to a surrogate pair, need another char
                            //

                            // Use our surrogate
                            sFourBytesOffset -= GBSurrogateOffset;
                            if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))),
                                                unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4))
                                break;
                        }
                        else
                        {
                            // Real GB18030 codepoint, but can't be mapped to unicode
                            // We already checked our buffer space.
                            // Do fallback here if we implement decoderfallbacks.
                            if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4))
                                break;
                        }

                        // We're done with this one
                        byte1 = -1;
                        byte2 = -1;
                        byte3 = -1;
                        byte4 = -1;
                    }
                    else
                    {
                        // Not a valid sequence, use '?' for 1st byte & scoot them all down 1
                        if (!buffer.Fallback((byte)byte1))
                            break;

                        // Move all bytes down 1
                        byte1 = byte2;
                        byte2 = byte3;
                        byte3 = byte4;
                        byte4 = -1;
                    }
                }
            }

            // Loop, just do '?' replacement because we don't have fallbacks for decodings.
            while (buffer.MoreData)
            {
                byte ch = buffer.GetNextByte();

                // ASCII case is easy
                if (ch <= 0x7f)
                {
                    // ASCII, have room?
                    if (!buffer.AddChar((char)ch))
                        break;              // No room in convert buffer, so stop
                }
                // See if its a lead byte
                else if (IsGBLeadByte(ch))
                {
                    // ch is a lead byte, have room for more?
                    if (buffer.MoreData)
                    {
                        byte ch2 = buffer.GetNextByte();
                        if (IsGBTwoByteTrailing(ch2))
                        {
                            //
                            // The trailing byte is a GB18030 two-byte sequence trailing byte.
                            //

                            //
                            // Two-byte GB18030
                            //
                            int iTwoBytes = ch << 8;
                            iTwoBytes |= ch2;
                            if (!buffer.AddChar(mapBytesToUnicode[iTwoBytes], 2))
                                break;
                        }
                        else if (IsGBFourByteTrailing(ch2))
                        {
                            // Do we have room for Four Byte Sequence? (already have 1 byte)
                            if (buffer.EvenMoreData(2))
                            {
                                // Is it a valid 4 byte sequence?
                                byte ch3 = buffer.GetNextByte();
                                byte ch4 = buffer.GetNextByte();
                                if (IsGBLeadByte(ch3) &&
                                    IsGBFourByteTrailing(ch4))
                                {
                                    //
                                    // Four-byte GB18030
                                    //
                                    int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4);

                                    // What kind is it?
                                    // We'll be at least 1 BMP char or a '?' char.

                                    if (sFourBytesOffset <= GBLast4ByteCode)
                                    {
                                        //
                                        // The Unicode will be in the BMP range.
                                        //
                                        if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4))
                                            break;
                                    }
                                    else if (sFourBytesOffset >= GBSurrogateOffset &&
                                             sFourBytesOffset <= GBLastSurrogateOffset)
                                    {
                                        //
                                        // This will be converted to a surrogate pair, need another char
                                        //

                                        // Use our surrogate
                                        sFourBytesOffset -= GBSurrogateOffset;
                                        if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))),
                                                             unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4))
                                            break;
                                    }
                                    else
                                    {
                                        // Real GB18030 codepoint, but can't be mapped to unicode
                                        if (!buffer.Fallback(ch, ch2, ch3, ch4))
                                            break;
                                    }
                                }
                                else
                                {
                                    // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again
                                    buffer.AdjustBytes(-3);
                                    if (!buffer.Fallback(ch))
                                        break;
                                }
                            }
                            else
                            {
                                // No room for 4 bytes, have 2 already, may be one more
                                // Lead byte but no place to stick it
                                if (decoder != null && !decoder.MustFlush)
                                {
                                    // (make sure not to set decoder if counting, so check chars)
                                    if (chars != null)
                                    {
                                        // We'll be able to stick the remainder in the decoder
                                        byte1 = ch;
                                        byte2 = ch2;

                                        if (buffer.MoreData)
                                            byte3 = buffer.GetNextByte();
                                        else
                                            byte3 = -1;

                                        byte4 = -1;
                                    }
                                    break;
                                }

                                // Won't go in decoder, we'll use '?' for it.
                                if (!buffer.Fallback(ch, ch2))
                                    break;
                            }
                        }
                        else
                        {
                            // Unknown byte sequence, fall back lead byte and try 2nd one again
                            buffer.AdjustBytes(-1);
                            if (!buffer.Fallback(ch))
                                break;
                        }
                    }
                    else
                    {
                        // Lead byte but don't know about trail byte
                        // (make sure not to set decoder if counting, so check bytes)
                        if (decoder != null && !decoder.MustFlush)
                        {
                            // We'll be able to stick it in the decoder
                            // (don't actually do it when counting though)
                            if (chars != null)
                            {
                                byte1 = ch;
                                byte2 = -1;
                                byte3 = -1;
                                byte4 = -1;
                            }
                            break;
                        }

                        if (!buffer.Fallback(ch))
                            break;
                    }
                }
                else
                {
                    // Not ASCII and not a lead byte, we'll use '?' for it if we have room
                    if (!buffer.Fallback(ch))
                        break;
                }
            }

            // Need to flush the decoder if necessary
            // (make sure not to set decoder if counting, so check bytes)
            if (decoder != null)
            {
                if (chars != null)
                {
                    decoder.bLeftOver1 = byte1;
                    decoder.bLeftOver2 = byte2;
                    decoder.bLeftOver3 = byte3;
                    decoder.bLeftOver4 = byte4;
                }
                decoder.m_bytesUsed = buffer.BytesUsed;
            }

            // Return the # of characters we found
            return buffer.Count;
        }