private unsafe int GetBytes(char *chars, int charIndex, int charCount, byte[] bytes, int byteIndex, UTF8Encoder encoder) { BCLDebug.Assert(chars != null, "[UTF8Encoding.GetBytes]chars!=null"); int charEnd = charIndex + charCount; int byteStart = byteIndex; int surrogateChar; if (encoder == null || !encoder.storedSurrogate) { surrogateChar = -1; } else { surrogateChar = encoder.surrogateChar; encoder.storedSurrogate = false; } try { while (charIndex < charEnd) { char ch = chars[charIndex++]; // // In previous byte, we encounter a high surrogate, so we are expecting a low surrogate here. // if (surrogateChar > 0) { if (StringInfo.IsLowSurrogate(ch)) { // We have a complete surrogate pair. surrogateChar = (surrogateChar - CharacterInfo.HIGH_SURROGATE_START) << 10; // (ch - 0xd800) * 0x400 surrogateChar += (ch - CharacterInfo.LOW_SURROGATE_START); surrogateChar += 0x10000; bytes[byteIndex++] = (byte)(0xF0 | (surrogateChar >> 18) & 0x07); bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 12) & 0x3F); bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 6) & 0x3F); bytes[byteIndex++] = (byte)(0x80 | surrogateChar & 0x3F); surrogateChar = -1; } else if (StringInfo.IsHighSurrogate(ch)) { // We have two high surrogate. if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (charIndex - 1)), "chars"); } // Encode the previous high-surrogate char. EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); surrogateChar = ch; } else { if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (charIndex - 1)), "chars"); } // Encode the previous high-surrogate char. EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); // Not a surrogate. Put the char back so that we can restart the encoding. surrogateChar = -1; charIndex--; } } else if (ch < 0x0080) { bytes[byteIndex++] = (byte)ch; } else if (ch < 0x0800) { bytes[byteIndex++] = (byte)(0xC0 | ch >> 6 & 0x1F); bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F); } else if (StringInfo.IsHighSurrogate(ch)) { // // Found the start of a surrogate. // surrogateChar = ch; } else if (StringInfo.IsLowSurrogate(ch) && isThrowException) { throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidLowSurrogate"), (charIndex - 1)), "chars"); } else //we now know that the char is >=0x0800 and isn't a high surrogate { bytes[byteIndex++] = (byte)(0xE0 | ch >> 12 & 0x0F); bytes[byteIndex++] = (byte)(0x80 | ch >> 6 & 0x3F); bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F); } } if (surrogateChar > 0) { if (encoder != null && !encoder.mustFlush) { encoder.surrogateChar = surrogateChar; encoder.storedSurrogate = true; } else { if (isThrowException) { throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidHighSurrogate"), (charIndex - 1)), "chars"); } EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); } } } catch (IndexOutOfRangeException) { throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow")); } return(byteIndex - byteStart); }
internal unsafe int GetByteCount(char *chars, int index, int count, UTF8Encoder encoder) { BCLDebug.Assert(chars != null, "[UTF8Encoding.GetByteCount]chars!=null"); int end = index + count; int byteCount = 0; bool inSurrogate; if (encoder == null || !encoder.storedSurrogate) { inSurrogate = false; } else { inSurrogate = true; } while (index < end && byteCount >= 0) { char ch = chars[index++]; if (inSurrogate) { // // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here. // if (StringInfo.IsLowSurrogate(ch)) { inSurrogate = false; // // One surrogate pair will be translated into 4 bytes UTF8. // byteCount += 4; } else if (StringInfo.IsHighSurrogate(ch)) { // We have two high surrogates. if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (index - 1)), "chars"); } // Encode the previous high-surrogate char. byteCount += 3; // The isSurrogate is still true, because this could be the start of another valid surrogate pair. } else { if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (index - 1)), "chars"); } // Encode the previous high-surrogate char. byteCount += 3; // Not a surrogate. Put the char back so that we can restart the encoding. inSurrogate = false; index--; } } else if (ch < 0x0080) { byteCount++; } else if (ch < 0x0800) { byteCount += 2; } else { if (StringInfo.IsHighSurrogate(ch)) { // // Found the start of a surrogate. // inSurrogate = true; } else if (StringInfo.IsLowSurrogate(ch) && isThrowException) { // // Found a low surrogate without encountering a high surrogate first. // throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidLowSurrogate"), (index - 1)), "chars"); } else { byteCount += 3; } } } // Check for overflows. if (byteCount < 0) { throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); } if (inSurrogate) { if (encoder == null || encoder.mustFlush) { if (isThrowException) { throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidHighSurrogate"), (index - 1)), "chars"); } byteCount += 3; } } return(byteCount); }