Пример #1
0
 static public int ToChars(byte[] buffer, int offset, int count, char[] chars, int charOffset)
 {
     try
     {
         return(UTF8Encoding.GetChars(buffer, offset, count, chars, charOffset));
     }
     catch (DecoderFallbackException exception)
     {
         throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(XmlExceptionHelper.CreateEncodingException(buffer, offset, count, exception));
     }
 }
Пример #2
0
 static public string ToStringUnicode(byte[] buffer, int offset, int count)
 {
     try
     {
         return(UnicodeEncoding.GetString(buffer, offset, count));
     }
     catch (DecoderFallbackException exception)
     {
         throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(XmlExceptionHelper.CreateEncodingException(buffer, offset, count, exception));
     }
 }
Пример #3
0
 static public byte[] ToBytes(string value)
 {
     try
     {
         return(UTF8Encoding.GetBytes(value));
     }
     catch (DecoderFallbackException exception)
     {
         throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(XmlExceptionHelper.CreateEncodingException(value, exception));
     }
 }
Пример #4
0
        public bool TryReadChars(char[] chars, int offset, int count, out int actual)
        {
            DiagnosticUtility.DebugAssert(offset + count <= chars.Length, string.Format("offset '{0}' + count '{1}' MUST BE <= chars.Length '{2}'", offset, count, chars.Length));

            if (_type == ValueHandleType.Unicode)
            {
                return(TryReadUnicodeChars(chars, offset, count, out actual));
            }

            if (_type != ValueHandleType.UTF8)
            {
                actual = 0;
                return(false);
            }

            int charOffset = offset;
            int charCount  = count;

            byte[] bytes      = _bufferReader.Buffer;
            int    byteOffset = _offset;
            int    byteCount  = _length;
            bool   insufficientSpaceInCharsArray = false;

            while (true)
            {
                while (charCount > 0 && byteCount > 0)
                {
                    // fast path for codepoints U+0000 - U+007F
                    byte b = bytes[byteOffset];
                    if (b >= 0x80)
                    {
                        break;
                    }
                    chars[charOffset] = (char)b;
                    byteOffset++;
                    byteCount--;
                    charOffset++;
                    charCount--;
                }

                if (charCount == 0 || byteCount == 0 || insufficientSpaceInCharsArray)
                {
                    break;
                }

                int actualByteCount;
                int actualCharCount;

                UTF8Encoding encoding = new UTF8Encoding(false, true);
                try
                {
                    // If we're asking for more than are possibly available, or more than are truly available then we can return the entire thing
                    if (charCount >= encoding.GetMaxCharCount(byteCount) || charCount >= encoding.GetCharCount(bytes, byteOffset, byteCount))
                    {
                        actualCharCount = encoding.GetChars(bytes, byteOffset, byteCount, chars, charOffset);
                        actualByteCount = byteCount;
                    }
                    else
                    {
                        Decoder decoder = encoding.GetDecoder();

                        // Since x bytes can never generate more than x characters this is a safe estimate as to what will fit
                        actualByteCount = Math.Min(charCount, byteCount);

                        // We use a decoder so we don't error if we fall across a character boundary
                        actualCharCount = decoder.GetChars(bytes, byteOffset, actualByteCount, chars, charOffset);

                        // We might've gotten zero characters though if < 4 bytes were requested because
                        // codepoints from U+0000 - U+FFFF can be up to 3 bytes in UTF-8, and represented as ONE char
                        // codepoints from U+10000 - U+10FFFF (last Unicode codepoint representable in UTF-8) are represented by up to 4 bytes in UTF-8
                        //                                    and represented as TWO chars (high+low surrogate)
                        // (e.g. 1 char requested, 1 char in the buffer represented in 3 bytes)
                        while (actualCharCount == 0)
                        {
                            // Note the by the time we arrive here, if actualByteCount == 3, the next decoder.GetChars() call will read the 4th byte
                            // if we don't bail out since the while loop will advance actualByteCount only after reading the byte.
                            if (actualByteCount >= 3 && charCount < 2)
                            {
                                // If we reach here, it means that we're:
                                // - trying to decode more than 3 bytes and,
                                // - there is only one char left of charCount where we're stuffing decoded characters.
                                // In this case, we need to back off since decoding > 3 bytes in UTF-8 means that we will get 2 16-bit chars
                                // (a high surrogate and a low surrogate) - the Decoder will attempt to provide both at once
                                // and an ArgumentException will be thrown complaining that there's not enough space in the output char array.

                                // actualByteCount = 0 when the while loop is broken out of; decoder goes out of scope so its state no longer matters

                                insufficientSpaceInCharsArray = true;
                                break;
                            }
                            else
                            {
                                DiagnosticUtility.DebugAssert(byteOffset + actualByteCount < bytes.Length,
                                                              string.Format("byteOffset {0} + actualByteCount {1} MUST BE < bytes.Length {2}", byteOffset, actualByteCount, bytes.Length));

                                // Request a few more bytes to get at least one character
                                actualCharCount = decoder.GetChars(bytes, byteOffset + actualByteCount, 1, chars, charOffset);
                                actualByteCount++;
                            }
                        }

                        // Now that we actually retrieved some characters, figure out how many bytes it actually was
                        actualByteCount = encoding.GetByteCount(chars, charOffset, actualCharCount);
                    }
                }
                catch (FormatException exception)
                {
                    throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(XmlExceptionHelper.CreateEncodingException(bytes, byteOffset, byteCount, exception));
                }

                // Advance
                byteOffset += actualByteCount;
                byteCount  -= actualByteCount;

                charOffset += actualCharCount;
                charCount  -= actualCharCount;
            }

            _offset = byteOffset;
            _length = byteCount;

            actual = (count - charCount);
            return(true);
        }