コード例 #1
0
        private unsafe int GetBytes(char *chars, int charIndex, int charCount, byte[] bytes, int byteIndex, UTF8Encoder encoder)
        {
            BCLDebug.Assert(chars != null, "[UTF8Encoding.GetBytes]chars!=null");

            int charEnd   = charIndex + charCount;
            int byteStart = byteIndex;

            int surrogateChar;

            if (encoder == null || !encoder.storedSurrogate)
            {
                surrogateChar = -1;
            }
            else
            {
                surrogateChar           = encoder.surrogateChar;
                encoder.storedSurrogate = false;
            }

            try {
                while (charIndex < charEnd)
                {
                    char ch = chars[charIndex++];
                    //
                    // In previous byte, we encounter a high surrogate, so we are expecting a low surrogate here.
                    //
                    if (surrogateChar > 0)
                    {
                        if (StringInfo.IsLowSurrogate(ch))
                        {
                            // We have a complete surrogate pair.
                            surrogateChar      = (surrogateChar - CharacterInfo.HIGH_SURROGATE_START) << 10; // (ch - 0xd800) * 0x400
                            surrogateChar     += (ch - CharacterInfo.LOW_SURROGATE_START);
                            surrogateChar     += 0x10000;
                            bytes[byteIndex++] = (byte)(0xF0 | (surrogateChar >> 18) & 0x07);
                            bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 12) & 0x3F);
                            bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 6) & 0x3F);
                            bytes[byteIndex++] = (byte)(0x80 | surrogateChar & 0x3F);
                            surrogateChar      = -1;
                        }
                        else if (StringInfo.IsHighSurrogate(ch))
                        {
                            // We have two high surrogate.
                            if (isThrowException)
                            {
                                throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (charIndex - 1)),
                                                            "chars");
                            }
                            // Encode the previous high-surrogate char.
                            EncodeThreeBytes(surrogateChar, bytes, ref byteIndex);
                            surrogateChar = ch;
                        }
                        else
                        {
                            if (isThrowException)
                            {
                                throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (charIndex - 1)),
                                                            "chars");
                            }

                            // Encode the previous high-surrogate char.
                            EncodeThreeBytes(surrogateChar, bytes, ref byteIndex);
                            // Not a surrogate. Put the char back so that we can restart the encoding.
                            surrogateChar = -1;
                            charIndex--;
                        }
                    }
                    else if (ch < 0x0080)
                    {
                        bytes[byteIndex++] = (byte)ch;
                    }
                    else if (ch < 0x0800)
                    {
                        bytes[byteIndex++] = (byte)(0xC0 | ch >> 6 & 0x1F);
                        bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F);
                    }
                    else if (StringInfo.IsHighSurrogate(ch))
                    {
                        //
                        // Found the start of a surrogate.
                        //
                        surrogateChar = ch;
                    }
                    else if (StringInfo.IsLowSurrogate(ch) && isThrowException)
                    {
                        throw new ArgumentException(
                                  String.Format(Environment.GetResourceString("Argument_InvalidLowSurrogate"), (charIndex - 1)), "chars");
                    }
                    else     //we now know that the char is >=0x0800 and isn't a high surrogate
                    {
                        bytes[byteIndex++] = (byte)(0xE0 | ch >> 12 & 0x0F);
                        bytes[byteIndex++] = (byte)(0x80 | ch >> 6 & 0x3F);
                        bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F);
                    }
                }
                if (surrogateChar > 0)
                {
                    if (encoder != null && !encoder.mustFlush)
                    {
                        encoder.surrogateChar   = surrogateChar;
                        encoder.storedSurrogate = true;
                    }
                    else
                    {
                        if (isThrowException)
                        {
                            throw new ArgumentException(
                                      String.Format(Environment.GetResourceString("Argument_InvalidHighSurrogate"), (charIndex - 1)), "chars");
                        }
                        EncodeThreeBytes(surrogateChar, bytes, ref byteIndex);
                    }
                }
            } catch (IndexOutOfRangeException) {
                throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow"));
            }

            return(byteIndex - byteStart);
        }
コード例 #2
0
        internal unsafe int GetByteCount(char *chars, int index, int count, UTF8Encoder encoder)
        {
            BCLDebug.Assert(chars != null, "[UTF8Encoding.GetByteCount]chars!=null");

            int end       = index + count;
            int byteCount = 0;

            bool inSurrogate;

            if (encoder == null || !encoder.storedSurrogate)
            {
                inSurrogate = false;
            }
            else
            {
                inSurrogate = true;
            }

            while (index < end && byteCount >= 0)
            {
                char ch = chars[index++];

                if (inSurrogate)
                {
                    //
                    // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
                    //
                    if (StringInfo.IsLowSurrogate(ch))
                    {
                        inSurrogate = false;
                        //
                        // One surrogate pair will be translated into 4 bytes UTF8.
                        //
                        byteCount += 4;
                    }
                    else if (StringInfo.IsHighSurrogate(ch))
                    {
                        // We have two high surrogates.
                        if (isThrowException)
                        {
                            throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (index - 1)),
                                                        "chars");
                        }
                        // Encode the previous high-surrogate char.
                        byteCount += 3;
                        // The isSurrogate is still true, because this could be the start of another valid surrogate pair.
                    }
                    else
                    {
                        if (isThrowException)
                        {
                            throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (index - 1)),
                                                        "chars");
                        }
                        // Encode the previous high-surrogate char.
                        byteCount += 3;
                        // Not a surrogate. Put the char back so that we can restart the encoding.
                        inSurrogate = false;
                        index--;
                    }
                }
                else if (ch < 0x0080)
                {
                    byteCount++;
                }
                else if (ch < 0x0800)
                {
                    byteCount += 2;
                }
                else
                {
                    if (StringInfo.IsHighSurrogate(ch))
                    {
                        //
                        // Found the start of a surrogate.
                        //
                        inSurrogate = true;
                    }
                    else if (StringInfo.IsLowSurrogate(ch) && isThrowException)
                    {
                        //
                        // Found a low surrogate without encountering a high surrogate first.
                        //
                        throw new ArgumentException(
                                  String.Format(Environment.GetResourceString("Argument_InvalidLowSurrogate"), (index - 1)), "chars");
                    }
                    else
                    {
                        byteCount += 3;
                    }
                }
            }

            // Check for overflows.
            if (byteCount < 0)
            {
                throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
            }

            if (inSurrogate)
            {
                if (encoder == null || encoder.mustFlush)
                {
                    if (isThrowException)
                    {
                        throw new ArgumentException(
                                  String.Format(Environment.GetResourceString("Argument_InvalidHighSurrogate"), (index - 1)), "chars");
                    }
                    byteCount += 3;
                }
            }
            return(byteCount);
        }