public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush) { if (chars == null || bytes == null) { throw new ArgumentNullException((chars == null ? "chars" : "bytes"), Environment.GetResourceString("ArgumentNull_Array")); } if (charIndex < 0 || charCount < 0) { throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); } if (chars.Length - charIndex < charCount) { throw new ArgumentOutOfRangeException("chars", Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); } if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException("byteIndex", Environment.GetResourceString("ArgumentOutOfRange_Index")); } if (charCount == 0) { return(0); } if (byteIndex == bytes.Length) { throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow")); } int result; char[] sourceChars = chars; if (m_highSurrogate != '\x0000') { sourceChars = new char[charCount + 1]; sourceChars[0] = m_highSurrogate; Array.Copy(chars, charIndex, sourceChars, 1, charCount); charIndex = 0; charCount++; // Add the high surrogate } if (StringInfo.IsHighSurrogate(sourceChars[charIndex + charCount - 1])) { m_highSurrogate = chars[charIndex + charCount - 1]; result = m_encoding.GetBytes(sourceChars, charIndex, charCount - 1, bytes, byteIndex); } else { m_highSurrogate = '\x0000'; result = m_encoding.GetBytes(sourceChars, charIndex, charCount, bytes, byteIndex); } return(result); }
public override int GetByteCount(char[] chars, int index, int count, bool flush) { if (chars == null) { throw new ArgumentNullException("chars", Environment.GetResourceString("ArgumentNull_Array")); } if (index < 0 || count < 0) { throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); } if (chars.Length - index < count) { throw new ArgumentOutOfRangeException("chars", Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); } if (count == 0) { return(0); } char[] sourceChars = chars; if (m_highSurrogate != '\x0000') { sourceChars = new char[count + 1]; sourceChars[0] = m_highSurrogate; Array.Copy(chars, index, sourceChars, 1, count); index = 0; count++; // Add the high surrogate } if (StringInfo.IsHighSurrogate(sourceChars[index + count - 1])) { return(m_encoding.GetByteCount(sourceChars, index, count - 1)); } return(m_encoding.GetByteCount(sourceChars, index, count)); }
private unsafe int GetBytes(char *chars, int charIndex, int charCount, byte[] bytes, int byteIndex, UTF8Encoder encoder) { BCLDebug.Assert(chars != null, "[UTF8Encoding.GetBytes]chars!=null"); int charEnd = charIndex + charCount; int byteStart = byteIndex; int surrogateChar; if (encoder == null || !encoder.storedSurrogate) { surrogateChar = -1; } else { surrogateChar = encoder.surrogateChar; encoder.storedSurrogate = false; } try { while (charIndex < charEnd) { char ch = chars[charIndex++]; // // In previous byte, we encounter a high surrogate, so we are expecting a low surrogate here. // if (surrogateChar > 0) { if (StringInfo.IsLowSurrogate(ch)) { // We have a complete surrogate pair. surrogateChar = (surrogateChar - CharacterInfo.HIGH_SURROGATE_START) << 10; // (ch - 0xd800) * 0x400 surrogateChar += (ch - CharacterInfo.LOW_SURROGATE_START); surrogateChar += 0x10000; bytes[byteIndex++] = (byte)(0xF0 | (surrogateChar >> 18) & 0x07); bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 12) & 0x3F); bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 6) & 0x3F); bytes[byteIndex++] = (byte)(0x80 | surrogateChar & 0x3F); surrogateChar = -1; } else if (StringInfo.IsHighSurrogate(ch)) { // We have two high surrogate. if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (charIndex - 1)), "chars"); } // Encode the previous high-surrogate char. EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); surrogateChar = ch; } else { if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (charIndex - 1)), "chars"); } // Encode the previous high-surrogate char. EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); // Not a surrogate. Put the char back so that we can restart the encoding. surrogateChar = -1; charIndex--; } } else if (ch < 0x0080) { bytes[byteIndex++] = (byte)ch; } else if (ch < 0x0800) { bytes[byteIndex++] = (byte)(0xC0 | ch >> 6 & 0x1F); bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F); } else if (StringInfo.IsHighSurrogate(ch)) { // // Found the start of a surrogate. // surrogateChar = ch; } else if (StringInfo.IsLowSurrogate(ch) && isThrowException) { throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidLowSurrogate"), (charIndex - 1)), "chars"); } else //we now know that the char is >=0x0800 and isn't a high surrogate { bytes[byteIndex++] = (byte)(0xE0 | ch >> 12 & 0x0F); bytes[byteIndex++] = (byte)(0x80 | ch >> 6 & 0x3F); bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F); } } if (surrogateChar > 0) { if (encoder != null && !encoder.mustFlush) { encoder.surrogateChar = surrogateChar; encoder.storedSurrogate = true; } else { if (isThrowException) { throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidHighSurrogate"), (charIndex - 1)), "chars"); } EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); } } } catch (IndexOutOfRangeException) { throw new ArgumentException(Environment.GetResourceString("Argument_ConversionOverflow")); } return(byteIndex - byteStart); }
internal unsafe int GetByteCount(char *chars, int index, int count, UTF8Encoder encoder) { BCLDebug.Assert(chars != null, "[UTF8Encoding.GetByteCount]chars!=null"); int end = index + count; int byteCount = 0; bool inSurrogate; if (encoder == null || !encoder.storedSurrogate) { inSurrogate = false; } else { inSurrogate = true; } while (index < end && byteCount >= 0) { char ch = chars[index++]; if (inSurrogate) { // // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here. // if (StringInfo.IsLowSurrogate(ch)) { inSurrogate = false; // // One surrogate pair will be translated into 4 bytes UTF8. // byteCount += 4; } else if (StringInfo.IsHighSurrogate(ch)) { // We have two high surrogates. if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (index - 1)), "chars"); } // Encode the previous high-surrogate char. byteCount += 3; // The isSurrogate is still true, because this could be the start of another valid surrogate pair. } else { if (isThrowException) { throw new ArgumentException(Environment.GetResourceString("Argument_InvalidHighSurrogate", (index - 1)), "chars"); } // Encode the previous high-surrogate char. byteCount += 3; // Not a surrogate. Put the char back so that we can restart the encoding. inSurrogate = false; index--; } } else if (ch < 0x0080) { byteCount++; } else if (ch < 0x0800) { byteCount += 2; } else { if (StringInfo.IsHighSurrogate(ch)) { // // Found the start of a surrogate. // inSurrogate = true; } else if (StringInfo.IsLowSurrogate(ch) && isThrowException) { // // Found a low surrogate without encountering a high surrogate first. // throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidLowSurrogate"), (index - 1)), "chars"); } else { byteCount += 3; } } } // Check for overflows. if (byteCount < 0) { throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); } if (inSurrogate) { if (encoder == null || encoder.mustFlush) { if (isThrowException) { throw new ArgumentException( String.Format(Environment.GetResourceString("Argument_InvalidHighSurrogate"), (index - 1)), "chars"); } byteCount += 3; } } return(byteCount); }