[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII; // Mode that we'll shift in to byte[] escapeBytes = new byte[4]; int escapeCount = 0; if (decoder != null) { currentMode = decoder.currentMode; shiftInMode = decoder.shiftInOutMode; // See if we have leftover decoder buffer to use // Load our bytesLeftOver escapeCount = decoder.bytesLeftOverCount; // Don't want to mess up decoder if we're counting or throw an exception for (int i = 0; i < escapeCount; i++) escapeBytes[i] = decoder.bytesLeftOver[i]; } // Do this until the end while (buffer.MoreData || escapeCount > 0) { byte ch; if (escapeCount > 0) { // Get more escape sequences if necessary if (escapeBytes[0] == ESCAPE) { // Stop if no more input if (!buffer.MoreData) { if (decoder != null && !decoder.MustFlush) break; } else { // Add it to the sequence we can check escapeBytes[escapeCount++] = buffer.GetNextByte(); // We have an escape sequence ISO2022Modes modeReturn = CheckEscapeSequenceJP(escapeBytes, escapeCount); if (modeReturn != ISO2022Modes.ModeInvalidEscape) { if (modeReturn != ISO2022Modes.ModeIncompleteEscape) { // Processed escape correctly escapeCount = 0; // We're now this mode currentMode = shiftInMode = modeReturn; } // Either way, continue to get next escape or real byte continue; } } // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. } // Read next escape byte and move them down one. ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount); } else { // Get our next byte ch = buffer.GetNextByte(); if (ch == ESCAPE) { // We'll have an escape sequence, use it if we don't have one buffered already if (escapeCount == 0) { // Start this new escape sequence escapeBytes[0] = ch; escapeCount = 1; continue; } // Flush the previous escape sequence, then reuse this escape byte buffer.AdjustBytes(-1); } } if (ch == SHIFT_OUT) { shiftInMode = currentMode; currentMode = ISO2022Modes.ModeHalfwidthKatakana; continue; } else if (ch == SHIFT_IN) { currentMode = shiftInMode; continue; } // Get our full character ushort iBytes = ch; bool b2Bytes = false; if (currentMode == ISO2022Modes.ModeJIS0208) { // // To handle errors, we need to check: // 1. if trailbyte is there // 2. if code is valid // if (escapeCount > 0) { // Let another escape fall through if (escapeBytes[0] != ESCAPE) { // Move them down one & get the next data iBytes <<= 8; iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); b2Bytes = true; } } else if (buffer.MoreData) { iBytes <<= 8; iBytes |= buffer.GetNextByte(); b2Bytes = true; } else { // Not enough input, use decoder if possible if (decoder == null || decoder.MustFlush) { // No decoder, do fallback for this byte buffer.Fallback(ch); break; } // Stick it in the decoder if we're not counting if (chars != null) { escapeBytes[0] = ch; escapeCount = 1; } break; } // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana // escape, so use 0x8e00 as katakana lead byte and keep same trail byte. // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have // any wierd compatibility issues. if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00)) { iBytes = (ushort)(iBytes & 0xff); iBytes |= (LEADBYTE_HALFWIDTH << 8); // Put us in the halfwidth katakana range } } else if (iBytes >= 0xA1 && iBytes <= 0xDF) { // Everett accidentally mapped Katakana like shift-jis (932), // even though this is a 7 bit code page. We keep that mapping iBytes |= (LEADBYTE_HALFWIDTH << 8); // Map to halfwidth katakana range iBytes &= 0xff7f; // remove extra 0x80 } else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana ) { // Add 0x10 lead byte that our encoding expects for Katakana: iBytes |= (LEADBYTE_HALFWIDTH << 8); } // We have an iBytes to try to convert. char c = mapBytesToUnicode[iBytes]; // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Have to do fallback if (b2Bytes) { if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes)) break; } else { if (!buffer.Fallback(ch)) break; } } else { // If we were JIS 0208, then we consumed an extra byte if (!buffer.AddChar(c, b2Bytes ? 2:1)) break; } } // Make sure our decoder state matches our mode, if not counting if (chars != null && decoder != null) { // Remember it if we don't flush if (!decoder.MustFlush || escapeCount != 0) { // Either not flushing or had state (from convert) Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; decoder.shiftInOutMode = shiftInMode; // Remember escape buffer decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = escapeBytes; } else { // We flush, clear buffer decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; // Slightly different if counting/not counting } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { Contract.Assert(byteCount >=0, "[UTF7Encoding.GetChars]byteCount >=0"); Contract.Assert(bytes!=null, "[UTF7Encoding.GetChars]bytes!=null"); Contract.Assert(charCount >=0, "[UTF7Encoding.GetChars]charCount >=0"); // Might use a decoder UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder) baseDecoder; // Get our output buffer info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Get decoder info int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; Contract.Assert(firstByte == false || decoder.bitCount <= 0, "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set"); } // We may have had bits in the decoder that we couldn't output last time, so do so now if (bitCount >= 16) { // Check our decoder buffer if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF))) ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert // Used this one, clean up extra bits bitCount -= 16; } // Loop through the input while (buffer.MoreData) { byte currentByte = buffer.GetNextByte(); int c; if (bitCount >= 0) { // // Modified base 64 encoding. // sbyte v; if (currentByte < 0x80 && ((v = base64Values[currentByte]) >=0)) { firstByte = false; bits = (bits << 6) | ((byte)v); bitCount += 6; if (bitCount >= 16) { c = (bits >> (bitCount - 16)) & 0xFFFF; bitCount -= 16; } // If not enough bits just continue else continue; } else { // If it wasn't a base 64 byte, everything's going to turn off base 64 mode bitCount = -1; if (currentByte != '-') { // >= 0x80 (because of 1st if statemtn) // We need this check since the base64Values[b] check below need b <= 0x7f. // This is not a valid base 64 byte. Terminate the shifted-sequence and // emit this byte. // not in base 64 table // According to the RFC 1642 and the example code of UTF-7 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte // Chars won't be updated unless this works, try to fallback if (!buffer.Fallback(currentByte)) break; // Stop here, didn't throw // Used that byte, we're done with it continue; } // // The encoding for '+' is "+-". // if (firstByte) c = '+'; // We just turn it off if not emitting a +, so we're done. else continue; } // // End of modified base 64 encoding block. // } else if (currentByte == '+') { // // Found the start of a modified base 64 encoding block or a plus sign. // bitCount = 0; firstByte = true; continue; } else { // Normal character if (currentByte >= 0x80) { // Try to fallback if (!buffer.Fallback(currentByte)) break; // Stop here, didn't throw // Done falling back continue; } // Use the normal character c = currentByte; } if (c >= 0) { // Check our buffer if (!buffer.AddChar((char)c)) { // No room. If it was a plain char we'll try again later. // Note, we'll consume this byte and stick it in decoder, even if we can't output it if (bitCount >= 0) // Can we rememmber this byte (char) { buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed bitCount += 16; // We'll still need that char we have in our bits } break; // didn't throw, stop } } } // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder) if (chars != null && decoder != null) { // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer) if (decoder.MustFlush) { // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder.m_bytesUsed = buffer.BytesUsed; } // else ignore any hanging bits. // Return our count return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode byte[] escapeBytes = new byte[4]; int escapeCount = 0; if (decoder != null) { currentMode = decoder.currentMode; // See if we have leftover decoder buffer to use // Load our bytesLeftOver escapeCount = decoder.bytesLeftOverCount; // Don't want to mess up decoder if we're counting or throw an exception for (int i = 0; i < escapeCount; i++) escapeBytes[i] = decoder.bytesLeftOver[i]; } // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData || escapeCount > 0) { byte ch; if (escapeCount > 0) { // Get more escape sequences if necessary if (escapeBytes[0] == ESCAPE) { // Stop if no more input if (!buffer.MoreData) { if (decoder != null && !decoder.MustFlush) break; } else { // Add it to the sequence we can check escapeBytes[escapeCount++] = buffer.GetNextByte(); // We have an escape sequence ISO2022Modes modeReturn = CheckEscapeSequenceKR(escapeBytes, escapeCount); if (modeReturn != ISO2022Modes.ModeInvalidEscape) { if (modeReturn != ISO2022Modes.ModeIncompleteEscape) { // Processed escape correctly, no effect (we know about KR mode) escapeCount = 0; } // Either way, continue to get next escape or real byte continue; } } // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. } // Still have something left over in escape buffer // Get it and move them down one ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount); } else { // Get our next byte ch = buffer.GetNextByte(); if (ch == ESCAPE) { // We'll have an escape sequence, use it if we don't have one buffered already if (escapeCount == 0) { // Start this new escape sequence escapeBytes[0] = ch; escapeCount = 1; continue; } // Flush previous escape sequence, then reuse this escape byte buffer.AdjustBytes(-1); } } if (ch == SHIFT_OUT) { currentMode = ISO2022Modes.ModeKR; continue; } else if (ch == SHIFT_IN) { currentMode = ISO2022Modes.ModeASCII; continue; } // Get our full character ushort iBytes = ch; bool b2Bytes = false; // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC. if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n') { // // To handle errors, we need to check: // 1. if trailbyte is there // 2. if code is valid // if (escapeCount > 0) { // Let another escape fall through if (escapeBytes[0] != ESCAPE) { // Move them down one & get the next data iBytes <<= 8; iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); b2Bytes = true; } } else if (buffer.MoreData) { iBytes <<= 8; iBytes |= buffer.GetNextByte(); b2Bytes = true; } else { // Not enough input, use decoder if possible if (decoder == null || decoder.MustFlush) { // No decoder, do fallback for lonely 1st byte buffer.Fallback(ch); break; } // Stick it in the decoder if we're not counting if (chars != null) { escapeBytes[0] = ch; escapeCount = 1; } break; } } // We have a iBytes to try to convert. char c = mapBytesToUnicode[iBytes]; // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Have to do fallback if (b2Bytes) { if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes)) break; } else { if (!buffer.Fallback(ch)) break; } } else { if (!buffer.AddChar(c, b2Bytes ? 2:1)) break; } } // Make sure our decoder state matches our mode, if not counting if (chars != null && decoder != null) { // Remember it if we don't flush if (!decoder.MustFlush || escapeCount != 0) { // Either not flushing or had state (from convert) Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; // Remember escape buffer decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = escapeBytes; } else { // We flush, clear buffer decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP52936(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Contract.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0"); Contract.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null"); // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; int byteLeftOver = -1; bool bUsedDecoder = false; if (decoder != null) { currentMode = decoder.currentMode; // See if we have leftover decoder buffer to use // Don't want to mess up decoder if we're counting or throw an exception if (decoder.bytesLeftOverCount != 0 ) { // Load our bytesLeftOver byteLeftOver = decoder.bytesLeftOver[0]; } } // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData || byteLeftOver >= 0) { byte ch; // May have a left over byte if (byteLeftOver >= 0) { ch = (byte)byteLeftOver; byteLeftOver = -1; } else { ch = buffer.GetNextByte(); } // We're in escape mode if (ch == '~') { // Next char is type of switch if (!buffer.MoreData) { // We don't have anything left, it'll be in decoder or a ? // don't fail if we are allowing overflows if (decoder == null || decoder.MustFlush) { // We'll be a '?' buffer.Fallback(ch); // break if we fail & break if we don't (because !MoreData) // Add succeeded, continue break; } // Stick it in decoder if (decoder != null) decoder.ClearMustFlush(); if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = (byte)'~'; bUsedDecoder = true; } break; } // What type is it?, get 2nd byte ch = buffer.GetNextByte(); if (ch == '~' && currentMode == ISO2022Modes.ModeASCII) { // Its just a ~~ replacement for ~, add it if (!buffer.AddChar((char)ch, 2)) // Add failed, break for converting break; // Add succeeded, continue continue; } else if (ch == '{') { // Switching to Double Byte mode currentMode = ISO2022Modes.ModeHZ; continue; } else if (ch == '}') { // Switching to ASCII mode currentMode = ISO2022Modes.ModeASCII; continue; } else if (ch == '\n') { // Ignore ~\n sequence continue; } else { // Unknown escape, back up and try the '~' as a "normal" byte or lead byte buffer.AdjustBytes(-1); ch = (byte)'~'; } } // go ahead and add our data if (currentMode != ISO2022Modes.ModeASCII) { // Should be ModeHZ Contract.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ"); char cm; // Everett allowed characters < 0x20 to be passed as if they were ASCII if (ch < 0x20) { // Emit it as ASCII goto STOREASCII; } // Its multibyte, should have another byte if (!buffer.MoreData) { // No bytes left // don't fail if we are allowing overflows if (decoder == null || decoder.MustFlush) { // Not enough bytes, fallback lead byte buffer.Fallback(ch); // Break if we fail & break because !MoreData break; } if (decoder != null) decoder.ClearMustFlush(); // Stick it in decoder if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = ch; bUsedDecoder = true; } break; } // Everett uses space as an escape character for single SBCS bytes byte ch2 = buffer.GetNextByte(); ushort iBytes = (ushort)(ch << 8 | ch2); if (ch == ' ' && ch2 != 0) { // Get next char and treat it like ASCII (Everett treated space like an escape // allowing the next char to be just ascii) cm = (char)ch2; goto STOREMULTIBYTE; } // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) && // Everett allowed high bit mappings for same characters (but only if both bits set) (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe)) { // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp) if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d) { iBytes = 0x2121; goto MULTIBYTE; } // Illegal char, use fallback. If lead byte is 0 have to do it special and do it first if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) break; continue; } MULTIBYTE: iBytes |= 0x8080; // Look up the multibyte char to stick it in our data // We have a iBytes to try to convert. cm = mapBytesToUnicode[iBytes]; STOREMULTIBYTE: // See if it was unknown if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Fall back the unknown stuff if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) break; continue; } if (!buffer.AddChar(cm, 2)) break; // convert ran out of buffer, stop continue; } // Just ASCII // We allow some chars > 7f because everett did, so we have to look them up. STOREASCII: char c = mapBytesToUnicode[ch]; // Check if it was unknown if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0)) { // fallback the unkown bytes if (!buffer.Fallback((byte)ch)) break; continue; } // Go ahead and add our ASCII character if (!buffer.AddChar(c)) break; // convert ran out of buffer, stop } // Need to remember our state, IF we're not counting if (chars != null && decoder != null) { if (!bUsedDecoder) { // If we didn't use it, clear the byte left over decoder.bytesLeftOverCount = 0; } if (decoder.MustFlush && decoder.bytesLeftOverCount == 0) { decoder.currentMode = ISO2022Modes.ModeASCII; } else { // Either not flushing or had state (from convert) Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { Decoder decoder = (Decoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; } if (bitCount >= 0x10) { if (!buffer.AddChar((char) ((bits >> (bitCount - 0x10)) & 0xffff))) { base.ThrowCharsOverflow(decoder, true); } bitCount -= 0x10; } while (buffer.MoreData) { int num4; byte nextByte = buffer.GetNextByte(); if (bitCount >= 0) { sbyte num5; if ((nextByte < 0x80) && ((num5 = this.base64Values[nextByte]) >= 0)) { firstByte = false; bits = (bits << 6) | ((byte) num5); bitCount += 6; if (bitCount < 0x10) { continue; } num4 = (bits >> (bitCount - 0x10)) & 0xffff; bitCount -= 0x10; goto Label_00FB; } bitCount = -1; if (nextByte == 0x2d) { if (!firstByte) { continue; } num4 = 0x2b; goto Label_00FB; } if (buffer.Fallback(nextByte)) { continue; } break; } if (nextByte == 0x2b) { bitCount = 0; firstByte = true; continue; } if (nextByte >= 0x80) { if (buffer.Fallback(nextByte)) { continue; } break; } num4 = nextByte; Label_00FB: if ((num4 >= 0) && !buffer.AddChar((char) num4)) { if (bitCount >= 0) { buffer.AdjustBytes(1); bitCount += 0x10; } break; } } if ((chars != null) && (decoder != null)) { if (decoder.MustFlush) { decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null chars as a count BCLDebug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null"); BCLDebug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative"); // BCLDebug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null"); BCLDebug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative"); // Fix our decoder GB18030Decoder decoder = (GB18030Decoder)baseDecoder; // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Need temp bytes because we can't muss up decoder short byte1 = -1; short byte2 = -1; short byte3 = -1; short byte4 = -1; // See if there was anything to get out of the decoder if (decoder != null && decoder.bLeftOver1 != -1) { // Need temp bytes because we can't muss up decoder byte1 = decoder.bLeftOver1; byte2 = decoder.bLeftOver2; byte3 = decoder.bLeftOver3; byte4 = decoder.bLeftOver4; // Loop because we might have too many in buffer // This could happen if we are working on a 4 byte sequence, but it isn't valid. while (byte1 != -1) { // If its not a lead byte, use ? or its value, then scoot them down & try again // This could happen if we previously had a bad 4 byte sequence and this is a trail byte if (!IsGBLeadByte(byte1)) { // This is either a ? or ASCII, need 1 char output if (byte1 <= 0x7f) { if (!buffer.AddChar((char)byte1)) // Its ASCII break; } else { if (!buffer.Fallback((byte)byte1)) // Not a valid byte break; } byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; continue; } // Read in more bytes as needed while (byte2 == -1 || (IsGBFourByteTrailing(byte2) && byte4 == -1)) { // Do we have room? if (!buffer.MoreData) { // No input left to read, do we have to flush? if (!decoder.MustFlush) { // Don't stick stuff in decoder when counting if (chars != null) { // Don't have to flush, won't have any chars // Decoder is correct, just return decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } // We'll have to flush, add a ? and scoot them down to try again // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out // Breaking will do this because we have zeros break; } // Read them in if (byte2 == -1) byte2 = buffer.GetNextByte(); else if (byte3 == -1) byte3 = buffer.GetNextByte(); else byte4 = buffer.GetNextByte(); } // Now we have our 2 or 4 bytes if (IsGBTwoByteTrailing(byte2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // int iTwoBytes = byte1 << 8; iTwoBytes |= unchecked((byte)byte2); if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2)) break; // We're done with it byte1 = -1; byte2 = -1; } else if (IsGBFourByteTrailing(byte2) && IsGBLeadByte(byte3) && IsGBFourByteTrailing(byte4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset( byte1, byte2, byte3, byte4); // What kind is it? if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode // We already checked our buffer space. // Do fallback here if we impliment decoderfallbacks. if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4)) break; } // We're done with this one byte1 = -1; byte2 = -1; byte3 = -1; byte4 = -1; } else { // Not a valid sequence, use '?' for 1st byte & scoot them all down 1 if (!buffer.Fallback((byte)byte1)) break; // Move all bytes down 1 byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; } } } // Loop, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData) { byte ch = buffer.GetNextByte(); // ASCII case is easy if (ch <= 0x7f) { // ASCII, have room? if (!buffer.AddChar((char)ch)) break; // No room in convert buffer, so stop } // See if its a lead byte else if (IsGBLeadByte(ch)) { // ch is a lead byte, have room for more? if (buffer.MoreData) { byte ch2 = buffer.GetNextByte(); if (IsGBTwoByteTrailing(ch2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // // // Two-byte GB18030 // int iTwoBytes = ch << 8; iTwoBytes |= ch2; if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2)) break; } else if (IsGBFourByteTrailing(ch2)) { // Do we have room for Four Byte Sequence? (already have 1 byte) if (buffer.EvenMoreData(2)) { // Is it a valid 4 byte sequence? byte ch3 = buffer.GetNextByte(); byte ch4 = buffer.GetNextByte(); if (IsGBLeadByte(ch3) && IsGBFourByteTrailing(ch4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4); // What kind is it? // We'll be at least 1 BMP char or a '?' char. if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset],4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))),4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode if (!buffer.Fallback(ch, ch2, ch3, ch4)) break; } } else { // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again buffer.AdjustBytes(-3); if (!buffer.Fallback(ch)) break; } } else { // No room for 4 bytes, have 2 already, may be one more // Lead byte but no place to stick it if (decoder != null && !decoder.MustFlush) { // (make sure not to set decoder if counting, so check chars) if (chars != null) { // We'll be able to stick the remainder in the decoder byte1 = ch; byte2 = ch2; if (buffer.MoreData) byte3 = buffer.GetNextByte(); else byte3 = -1; byte4=-1; } break; } // Won't go in decoder, we'll use '?' for it. if (!buffer.Fallback(ch, ch2)) break; } } else { // Unknown byte sequence, fall back lead byte and try 2nd one again buffer.AdjustBytes(-1); if (!buffer.Fallback(ch)) break; } } else { // Lead byte but don't know about trail byte // (make sure not to set decoder if counting, so check bytes) if (decoder != null && !decoder.MustFlush) { // We'll be able to stick it in the decoder // (don't actually do it when counting though) if (chars != null) { byte1 = ch; byte2 = -1; byte3 = -1; byte4 = -1; } break; } if (!buffer.Fallback(ch)) break; } } else { // Not ASCII and not a lead byte, we'll use '?' for it if we have room if (!buffer.Fallback(ch)) break; } } // Need to flush the decoder if necessary // (make sure not to set decoder if counting, so check bytes) if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return the # of characters we found return buffer.Count; }
private unsafe int GetCharsCP52936(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); ISO2022Modes modeASCII = ISO2022Modes.ModeASCII; int num = -1; bool flag = false; if (decoder != null) { modeASCII = decoder.currentMode; if (decoder.bytesLeftOverCount != 0) { num = decoder.bytesLeftOver[0]; } } while (buffer.MoreData || (num >= 0)) { byte nextByte; if (num >= 0) { nextByte = (byte) num; num = -1; } else { nextByte = buffer.GetNextByte(); } if (nextByte == 0x7e) { if (!buffer.MoreData) { if ((decoder == null) || decoder.MustFlush) { buffer.Fallback(nextByte); } else { if (decoder != null) { decoder.ClearMustFlush(); } if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = 0x7e; flag = true; } } break; } nextByte = buffer.GetNextByte(); if ((nextByte == 0x7e) && (modeASCII == ISO2022Modes.ModeASCII)) { if (buffer.AddChar((char) nextByte, 2)) { continue; } break; } if (nextByte == 0x7b) { modeASCII = ISO2022Modes.ModeHZ; continue; } if (nextByte == 0x7d) { modeASCII = ISO2022Modes.ModeASCII; continue; } if (nextByte == 10) { continue; } buffer.AdjustBytes(-1); nextByte = 0x7e; } if ((modeASCII != ISO2022Modes.ModeASCII) && (nextByte >= 0x20)) { if (!buffer.MoreData) { if ((decoder == null) || decoder.MustFlush) { buffer.Fallback(nextByte); } else { if (decoder != null) { decoder.ClearMustFlush(); } if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = nextByte; flag = true; } } } else { char ch; byte num3 = buffer.GetNextByte(); ushort index = (ushort) ((nextByte << 8) | num3); if ((nextByte == 0x20) && (num3 != 0)) { ch = (char) num3; } else { if ((((nextByte < 0x21) || (nextByte > 0x77)) || ((num3 < 0x21) || (num3 > 0x7e))) && (((nextByte < 0xa1) || (nextByte > 0xf7)) || ((num3 < 0xa1) || (num3 > 0xfe)))) { if (((num3 == 0x20) && (0x21 <= nextByte)) && (nextByte <= 0x7d)) { index = 0x2121; } else { if (buffer.Fallback((byte) (index >> 8), (byte) index)) { continue; } break; } } index = (ushort) (index | 0x8080); ch = base.mapBytesToUnicode[index]; } if ((ch == '\0') && (index != 0)) { if (buffer.Fallback((byte) (index >> 8), (byte) index)) { continue; } } else if (buffer.AddChar(ch, 2)) { continue; } } break; } char ch2 = base.mapBytesToUnicode[nextByte]; if (((ch2 == '\0') || (ch2 == '\0')) && (nextByte != 0)) { if (buffer.Fallback(nextByte)) { continue; } break; } if (!buffer.AddChar(ch2)) { break; } } if ((chars != null) && (decoder != null)) { if (!flag) { decoder.bytesLeftOverCount = 0; } if (decoder.MustFlush && (decoder.bytesLeftOverCount == 0)) { decoder.currentMode = ISO2022Modes.ModeASCII; } else { decoder.currentMode = modeASCII; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); ISO2022Modes modeASCII = ISO2022Modes.ModeASCII; ISO2022Modes shiftInOutMode = ISO2022Modes.ModeASCII; byte[] buffer2 = new byte[4]; int escapeCount = 0; if (decoder != null) { modeASCII = decoder.currentMode; shiftInOutMode = decoder.shiftInOutMode; escapeCount = decoder.bytesLeftOverCount; for (int i = 0; i < escapeCount; i++) { buffer2[i] = decoder.bytesLeftOver[i]; } } while (buffer.MoreData || (escapeCount > 0)) { byte nextByte; if (escapeCount > 0) { if (buffer2[0] == 0x1b) { if (!buffer.MoreData) { if ((decoder != null) && !decoder.MustFlush) { break; } } else { buffer2[escapeCount++] = buffer.GetNextByte(); ISO2022Modes modes3 = this.CheckEscapeSequenceJP(buffer2, escapeCount); if (modes3 != ISO2022Modes.ModeInvalidEscape) { if (modes3 != ISO2022Modes.ModeIncompleteEscape) { escapeCount = 0; modeASCII = shiftInOutMode = modes3; } continue; } } } nextByte = this.DecrementEscapeBytes(ref buffer2, ref escapeCount); } else { nextByte = buffer.GetNextByte(); if (nextByte == 0x1b) { if (escapeCount == 0) { buffer2[0] = nextByte; escapeCount = 1; continue; } buffer.AdjustBytes(-1); } } if (nextByte == 14) { shiftInOutMode = modeASCII; modeASCII = ISO2022Modes.ModeHalfwidthKatakana; } else { if (nextByte == 15) { modeASCII = shiftInOutMode; continue; } ushort index = nextByte; bool flag = false; if (modeASCII == ISO2022Modes.ModeJIS0208) { if (escapeCount > 0) { if (buffer2[0] != 0x1b) { index = (ushort) (index << 8); index = (ushort) (index | this.DecrementEscapeBytes(ref buffer2, ref escapeCount)); flag = true; } } else if (buffer.MoreData) { index = (ushort) (index << 8); index = (ushort) (index | buffer.GetNextByte()); flag = true; } else { if ((decoder == null) || decoder.MustFlush) { buffer.Fallback(nextByte); } else if (chars != null) { buffer2[0] = nextByte; escapeCount = 1; } break; } if (flag && ((index & 0xff00) == 0x2a00)) { index = (ushort) (index & 0xff); index = (ushort) (index | 0x1000); } } else if ((index >= 0xa1) && (index <= 0xdf)) { index = (ushort) (index | 0x1000); index = (ushort) (index & 0xff7f); } else if (modeASCII == ISO2022Modes.ModeHalfwidthKatakana) { index = (ushort) (index | 0x1000); } char ch = base.mapBytesToUnicode[index]; if ((ch == '\0') && (index != 0)) { if (flag) { if (buffer.Fallback((byte) (index >> 8), (byte) index)) { continue; } } else if (buffer.Fallback(nextByte)) { continue; } break; } if (!buffer.AddChar(ch, flag ? 2 : 1)) { break; } } } if ((chars != null) && (decoder != null)) { if (!decoder.MustFlush || (escapeCount != 0)) { decoder.currentMode = modeASCII; decoder.shiftInOutMode = shiftInOutMode; decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = buffer2; } else { decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { GB18030Decoder decoder = (GB18030Decoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); short ch = -1; short nextByte = -1; short num3 = -1; short num4 = -1; if ((decoder != null) && (decoder.bLeftOver1 != -1)) { ch = decoder.bLeftOver1; nextByte = decoder.bLeftOver2; num3 = decoder.bLeftOver3; num4 = decoder.bLeftOver4; while (ch != -1) { if (this.IsGBLeadByte(ch)) { goto Label_00FC; } if (ch <= 0x7f) { if (!buffer.AddChar((char) ((ushort) ch))) { break; } } else if (!buffer.Fallback((byte) ch)) { break; } ch = nextByte; nextByte = num3; num3 = num4; num4 = -1; continue; Label_0092: if (!buffer.MoreData) { if (!decoder.MustFlush) { if (chars != null) { decoder.bLeftOver1 = ch; decoder.bLeftOver2 = nextByte; decoder.bLeftOver3 = num3; decoder.bLeftOver4 = num4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } goto Label_010E; } if (nextByte == -1) { nextByte = buffer.GetNextByte(); } else if (num3 == -1) { num3 = buffer.GetNextByte(); } else { num4 = buffer.GetNextByte(); } Label_00FC: if ((nextByte == -1) || (this.IsGBFourByteTrailing(nextByte) && (num4 == -1))) { goto Label_0092; } Label_010E: if (this.IsGBTwoByteTrailing(nextByte)) { int index = ch << 8; index |= (byte) nextByte; if (!buffer.AddChar(base.mapBytesToUnicode[index], 2)) { break; } ch = -1; nextByte = -1; continue; } if ((this.IsGBFourByteTrailing(nextByte) && this.IsGBLeadByte(num3)) && this.IsGBFourByteTrailing(num4)) { int num6 = this.GetFourBytesOffset(ch, nextByte, num3, num4); if (num6 <= 0x99fb) { if (!buffer.AddChar(this.map4BytesToUnicode[num6], 4)) { break; } } else if ((num6 >= 0x2e248) && (num6 <= 0x12e247)) { num6 -= 0x2e248; if (!buffer.AddChar((char) (0xd800 + (num6 / 0x400)), (char) (0xdc00 + (num6 % 0x400)), 4)) { break; } } else if (!buffer.Fallback((byte) ch, (byte) nextByte, (byte) num3, (byte) num4)) { break; } ch = -1; nextByte = -1; num3 = -1; num4 = -1; continue; } if (!buffer.Fallback((byte) ch)) { break; } ch = nextByte; nextByte = num3; num3 = num4; num4 = -1; } } while (buffer.MoreData) { byte num7 = buffer.GetNextByte(); if (num7 <= 0x7f) { if (buffer.AddChar((char) num7)) { continue; } break; } if (this.IsGBLeadByte(num7)) { if (buffer.MoreData) { byte num8 = buffer.GetNextByte(); if (this.IsGBTwoByteTrailing(num8)) { int num9 = num7 << 8; num9 |= num8; if (buffer.AddChar(base.mapBytesToUnicode[num9], 2)) { continue; } } else if (this.IsGBFourByteTrailing(num8)) { if (buffer.EvenMoreData(2)) { byte num10 = buffer.GetNextByte(); byte num11 = buffer.GetNextByte(); if (this.IsGBLeadByte(num10) && this.IsGBFourByteTrailing(num11)) { int num12 = this.GetFourBytesOffset(num7, num8, num10, num11); if (num12 <= 0x99fb) { if (buffer.AddChar(this.map4BytesToUnicode[num12], 4)) { continue; } } else if ((num12 >= 0x2e248) && (num12 <= 0x12e247)) { num12 -= 0x2e248; if (buffer.AddChar((char) (0xd800 + (num12 / 0x400)), (char) (0xdc00 + (num12 % 0x400)), 4)) { continue; } } else if (buffer.Fallback(num7, num8, num10, num11)) { continue; } } else { buffer.AdjustBytes(-3); if (buffer.Fallback(num7)) { continue; } } } else if ((decoder != null) && !decoder.MustFlush) { if (chars != null) { ch = num7; nextByte = num8; if (buffer.MoreData) { num3 = buffer.GetNextByte(); } else { num3 = -1; } num4 = -1; } } else if (buffer.Fallback(num7, num8)) { continue; } } else { buffer.AdjustBytes(-1); if (buffer.Fallback(num7)) { continue; } } } else if ((decoder != null) && !decoder.MustFlush) { if (chars != null) { ch = num7; nextByte = -1; num3 = -1; num4 = -1; } } else if (buffer.Fallback(num7)) { continue; } break; } if (!buffer.Fallback(num7)) { break; } } if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = ch; decoder.bLeftOver2 = nextByte; decoder.bLeftOver3 = num3; decoder.bLeftOver4 = num4; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }