internal unsafe int GetByteCount(char[] chars, int index, int count, UTF8Encoder encoder) { if (chars == null) { throw new ArgumentNullException("chars", Environment.GetResourceString("ArgumentNull_Array")); } if (index < 0 || count < 0) { throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); } if (chars.Length - index < count) { throw new ArgumentOutOfRangeException("chars", Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); } int retVal = -1; if (chars.Length == 0) { return(0); } fixed(char *p = chars) { retVal = GetByteCount(p, index, count, encoder); } BCLDebug.Assert(retVal != -1, "[UTF8Encoding.GetByteCount]retVal!=-1"); return(retVal); }
private int _GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, UTF8Encoder encoder) { Debug.Assert(chars != null, "[UTF8Encoding.GetBytes]chars!=null"); int charEnd = charIndex + charCount; int byteStart = byteIndex; int surrogateChar; if (encoder == null || !encoder.storedSurrogate) { surrogateChar = -1; } else { surrogateChar = encoder.surrogateChar; encoder.storedSurrogate = false; } try { while (charIndex < charEnd) { char ch = chars[charIndex++]; // // In previous byte, we encounter a high surrogate, so we are expecting a low surrogate here. // if (surrogateChar > 0) { if (CharacterInfo.IsLowSurrogate(ch)) { // We have a complete surrogate pair. surrogateChar = (surrogateChar - CharacterInfo.HIGH_SURROGATE_START) << 10; // (ch - 0xd800) * 0x400 surrogateChar += (ch - CharacterInfo.LOW_SURROGATE_START); surrogateChar += 0x10000; bytes[byteIndex++] = (byte)(0xF0 | (surrogateChar >> 18) & 0x07); bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 12) & 0x3F); bytes[byteIndex++] = (byte)(0x80 | (surrogateChar >> 6) & 0x3F); bytes[byteIndex++] = (byte)(0x80 | surrogateChar & 0x3F); surrogateChar = -1; } else if (CharacterInfo.IsHighSurrogate(ch)) { // We have two high surrogate. if (isThrowException) { throw new ArgumentException("Argument_InvalidHighSurrogate"); } // Encode the previous high-surrogate char. EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); surrogateChar = ch; } else { if (isThrowException) { throw new ArgumentException("Argument_InvalidHighSurrogate"); } // Encode the previous high-surrogate char. EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); // Not a surrogate. Put the char back so that we can restart the encoding. surrogateChar = -1; charIndex--; } } else if (ch < 0x0080) { bytes[byteIndex++] = (byte)ch; } else if (ch < 0x0800) { bytes[byteIndex++] = (byte)(0xC0 | ch >> 6 & 0x1F); bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F); } else if (CharacterInfo.IsHighSurrogate(ch)) { // // Found the start of a surrogate. // surrogateChar = ch; } else if (CharacterInfo.IsLowSurrogate(ch) && isThrowException) { throw new ArgumentException("Argument_InvalidLowSurrogate"); } else //we now know that the char is >= 0x0800 and isn't a high surrogate { bytes[byteIndex++] = (byte)(0xE0 | ch >> 12 & 0x0F); bytes[byteIndex++] = (byte)(0x80 | ch >> 6 & 0x3F); bytes[byteIndex++] = (byte)(0x80 | ch & 0x3F); } } if (surrogateChar > 0) { if (encoder != null && !encoder.mustFlush) { encoder.surrogateChar = surrogateChar; encoder.storedSurrogate = true; } else { if (isThrowException) { throw new ArgumentException("Argument_InvalidHighSurrogate"); } EncodeThreeBytes(surrogateChar, bytes, ref byteIndex); } } } catch (IndexOutOfRangeException) { throw new ArgumentException("Argument_ConversionOverflow"); } return(byteIndex - byteStart); }
private int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, UTF8Encoder encoder) { if (chars == null || bytes == null) { throw new ArgumentNullException((chars == null ? "chars" : "bytes"), "ArgumentNull_Array"); } if (charIndex < 0 || charCount < 0) { throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), "ArgumentOutOfRange_NeedNonNegNum"); } if (chars.Length - charIndex < charCount) { throw new ArgumentOutOfRangeException("chars", "ArgumentOutOfRange_IndexCountBuffer"); } if (byteIndex < 0 || byteIndex > bytes.Length) { throw new ArgumentOutOfRangeException("byteIndex", "ArgumentOutOfRange_Index"); } int retVal = -1; if (chars.Length == 0) { return(0); } retVal = _GetBytes(chars, charIndex, charCount, bytes, byteIndex, encoder); Debug.Assert(retVal != -1, "[UTF8Encoding.GetByteCount]retVal!=-1"); return(retVal); }
internal int _GetByteCount(char[] chars, int index, int count, UTF8Encoder encoder) { Debug.Assert(chars != null, "[UTF8Encoding.GetByteCount]chars!=null"); int end = index + count; int byteCount = 0; bool inSurrogate; if (encoder == null || !encoder.storedSurrogate) { inSurrogate = false; } else { inSurrogate = true; } while (index < end && byteCount >= 0) { char ch = chars[index++]; if (inSurrogate) { // // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here. // if (CharacterInfo.IsLowSurrogate(ch)) { inSurrogate = false; // // One surrogate pair will be translated into 4 bytes UTF8. // byteCount += 4; } else if (CharacterInfo.IsHighSurrogate(ch)) { // We have two high surrogates. if (isThrowException) { throw new ArgumentException("Argument_InvalidHighSurrogate"); } // Encode the previous high-surrogate char. byteCount += 3; // The isSurrogate is still true, because this could be the start of another valid surrogate pair. } else { if (isThrowException) { throw new ArgumentException("Argument_InvalidHighSurrogate"); } // Encode the previous high-surrogate char. byteCount += 3; // Not a surrogate. Put the char back so that we can restart the encoding. inSurrogate = false; index--; } } else if (ch < 0x0080) { byteCount++; } else if (ch < 0x0800) { byteCount += 2; } else { if (CharacterInfo.IsHighSurrogate(ch)) { // // Found the start of a surrogate. // inSurrogate = true; } else if (CharacterInfo.IsLowSurrogate(ch) && isThrowException) { // // Found a low surrogate without encountering a high surrogate first. // throw new ArgumentException("Argument_InvalidLowSurrogate"); } else { byteCount += 3; } } } // Check for overflows. if (byteCount < 0) { throw new ArgumentOutOfRangeException("count", "ArgumentOutOfRange_GetByteCountOverflow"); } if (inSurrogate) { if (encoder == null || encoder.mustFlush) { if (isThrowException) { throw new ArgumentException("Argument_InvalidHighSurrogate"); } byteCount += 3; } } return(byteCount); }
public UTF8Encoding() { encoder = new UTF8Encoder(); decoder = new UTF8Decoder(); }