[System.Security.SecurityCritical] // auto-generated public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // Allow null chars for counting Debug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative"); // Debug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative"); // Need the ISCII Decoder ISCIIDecoder decoder = (ISCIIDecoder)baseDecoder; // Get our info. EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); int currentCodePage = _defaultCodePage; bool bLastATR = false; bool bLastVirama = false; bool bLastDevenagariStressAbbr = false; char cLastCharForNextNukta = '\0'; char cLastCharForNoNextNukta = '\0'; // See if there's anything in our decoder if (decoder != null) { currentCodePage = decoder.currentCodePage; bLastATR = decoder.bLastATR; bLastVirama = decoder.bLastVirama; bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr; cLastCharForNextNukta = decoder.cLastCharForNextNukta; cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta; } bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr | (cLastCharForNextNukta != '\0'); // Get our current code page index (some code pages are dups) int currentCodePageIndex = -1; Debug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi, "[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage); if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = s_IndicMappingIndex[currentCodePage]; } // Loop through our input while (buffer.MoreData) { byte b = buffer.GetNextByte(); // See if last one was special if (bLastSpecial) { // Now it won't be bLastSpecial = false; // One and only one of our flags should be set Debug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) + (bLastDevenagariStressAbbr ? 1 : 0) + ((cLastCharForNextNukta > 0) ? 1 : 0)) == 1, String.Format(CultureInfo.InvariantCulture, "[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}", bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta)); // If the last one was an ATR, then we'll have to do ATR stuff if (bLastATR) { // We only support Devanagari - Punjabi if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi)) { // Remember the code page currentCodePage = b & 0xf; currentCodePageIndex = s_IndicMappingIndex[currentCodePage]; // No longer last ATR bLastATR = false; continue; } // Change back to default? if (b == 0x40) { currentCodePage = _defaultCodePage; currentCodePageIndex = -1; if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = s_IndicMappingIndex[currentCodePage]; } // No longer last ATR bLastATR = false; continue; } // We don't support Roman if (b == 0x41) { currentCodePage = _defaultCodePage; currentCodePageIndex = -1; if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = s_IndicMappingIndex[currentCodePage]; } // Even though we don't know how to support Roman, windows didn't add a ? so we don't either. // No longer last ATR bLastATR = false; continue; } // Other code pages & ATR codes not supported, fallback the ATR // If fails, decrements the buffer, which is OK, we remember ATR state. if (!buffer.Fallback(ControlATR)) break; // No longer last ATR (fell back) bLastATR = false; // we know we can't have any of these other modes Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastATR mode"); Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastATR mode"); Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastATR mode"); Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastATR mode"); // Keep processing this byte } else if (bLastVirama) { // If last was Virama, then we might need ZWNJ or ZWJ instead if (b == Virama) { // If no room, then stop if (!buffer.AddChar(ZWNJ)) break; bLastVirama = false; continue; } if (b == Nukta) { // If no room, then stop if (!buffer.AddChar(ZWJ)) break; bLastVirama = false; continue; } // No longer in this mode, fall through to handle character // (Virama itself was added when flag was set last iteration) bLastVirama = false; // We know we can't have any of these other modes Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastVirama mode"); Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in bLastVirama mode"); Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastVirama mode"); Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastVirama mode"); } else if (bLastDevenagariStressAbbr) { // Last byte was an 0xf0 (ext). // If current is b8 or bf, then we have 952 or 970. Otherwise fallback if (b == 0xb8) { // It was a 0xb8 if (!buffer.AddChar('\x0952')) // Devanagari stress sign anudatta break; bLastDevenagariStressAbbr = false; continue; } if (b == 0xbf) { // It was a 0xbf if (!buffer.AddChar('\x0970')) // Devanagari abbr. sign break; bLastDevenagariStressAbbr = false; continue; } // Wasn't an expected pattern, do fallback for f0 (ext) // if fails, fallback will back up our buffer if (!buffer.Fallback(DevenagariExt)) break; // Keep processing this byte (turn off mode) // (last character was added when mode was set) bLastDevenagariStressAbbr = false; Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in bLastDevenagariStressAbbr mode"); Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in bLastDevenagariStressAbbr mode"); Debug.Assert(cLastCharForNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNextNukta in bLastDevenagariStressAbbr mode"); Debug.Assert(cLastCharForNoNextNukta == (char)0, "[ISCIIEncoding.GetChars] Expected no cLastCharForNoNextNukta in bLastDevenagariStressAbbr mode"); } else { // We were checking for next char being a nukta Debug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0, "[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set."); // We'll either add combined char or last char if (b == Nukta) { // We combine nukta with previous char if (!buffer.AddChar(cLastCharForNextNukta)) break; // Done already cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; continue; } // No Nukta, just add last character and keep processing current byte if (!buffer.AddChar(cLastCharForNoNextNukta)) break; // Keep processing this byte, turn off mode. cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; Debug.Assert(bLastATR == false, "[ISCIIEncoding.GetChars] Expected no bLastATR in cLastCharForNextNukta mode"); Debug.Assert(bLastVirama == false, "[ISCIIEncoding.GetChars] Expected no bLastVirama in cLastCharForNextNukta mode"); Debug.Assert(bLastDevenagariStressAbbr == false, "[ISCIIEncoding.GetChars] Expected no bLastDevenagariStressAbbr in cLastCharForNextNukta mode"); } } // Now bLastSpecial should be false and all flags false. Debug.Assert(!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR && cLastCharForNextNukta == '\0', "[ISCIIEncoding.GetChars]No special state for last code point should exist at this point."); // If its a simple byte, just add it if (b < MultiByteBegin) { if (!buffer.AddChar((char)b)) break; continue; } // See if its an ATR marker if (b == ControlATR) { bLastATR = bLastSpecial = true; continue; } Debug.Assert(currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1"); char ch = s_IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin]; char cAlt = s_IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin]; // If no 2nd char, just add it, also lonely Nuktas get added as well. if (cAlt == 0 || b == Nukta) { // If it was an unknown character do fallback // ? if not known. if (ch == 0) { // Fallback the unknown byte if (!buffer.Fallback(b)) break; } else { // Add the known character if (!buffer.AddChar(ch)) break; } continue; } // if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed. if (b == Virama) { // Add Virama if (!buffer.AddChar(ch)) break; bLastVirama = bLastSpecial = true; continue; } // See if its one that changes with a Nukta if ((cAlt & 0xF000) == 0) { // It could change if next char is a nukta bLastSpecial = true; cLastCharForNextNukta = cAlt; cLastCharForNoNextNukta = ch; continue; } // We must be the Devenagari special case for F0, B8 & F0, BF Debug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt, String.Format(CultureInfo.InvariantCulture, "[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.", DevenagariExt, b, CodeDevanagari, currentCodePage)); bLastDevenagariStressAbbr = bLastSpecial = true; } // If we don't have a decoder, or if we had to flush, then we need to get rid // of last ATR, LastNoNextNukta and LastDevenagariExt. if (decoder == null || decoder.MustFlush) { // If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well. if (bLastATR) { // Have to add ATR fallback if (buffer.Fallback(ControlATR)) bLastATR = false; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } else if (bLastDevenagariStressAbbr) { // Have to do fallback for DevenagariExt if (buffer.Fallback(DevenagariExt)) bLastDevenagariStressAbbr = false; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } else if (cLastCharForNoNextNukta != '\0') { // Have to add our last char because there was no next nukta if (buffer.AddChar(cLastCharForNoNextNukta)) cLastCharForNoNextNukta = cLastCharForNextNukta = '\0'; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } // LastVirama is unimportant for flushing decoder. } // Remember any left over stuff // (only remember if we aren't counting) if (decoder != null && chars != null) { // If not flushing or have state (from convert) then need to remember state if (!decoder.MustFlush || cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr) { // Either not flushing or had state (from convert) Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing"); decoder.currentCodePage = currentCodePage; decoder.bLastVirama = bLastVirama; decoder.bLastATR = bLastATR; decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr; decoder.cLastCharForNextNukta = cLastCharForNextNukta; decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta; } else { decoder.currentCodePage = _defaultCodePage; decoder.bLastVirama = false; decoder.bLastATR = false; decoder.bLastDevenagariStressAbbr = false; decoder.cLastCharForNextNukta = '\0'; decoder.cLastCharForNoNextNukta = '\0'; } decoder.m_bytesUsed = buffer.BytesUsed; } // Otherwise we already did fallback and added extra things // Return the # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null chars as a count Debug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null"); Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative"); // Debug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null"); Debug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative"); // Fix our decoder GB18030Decoder decoder = (GB18030Decoder)baseDecoder; // Get our info. EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); // Need temp bytes because we can't muss up decoder short byte1 = -1; short byte2 = -1; short byte3 = -1; short byte4 = -1; // See if there was anything to get out of the decoder if (decoder != null && decoder.bLeftOver1 != -1) { // Need temp bytes because we can't muss up decoder byte1 = decoder.bLeftOver1; byte2 = decoder.bLeftOver2; byte3 = decoder.bLeftOver3; byte4 = decoder.bLeftOver4; // Loop because we might have too many in buffer // This could happen if we are working on a 4 byte sequence, but it isn't valid. while (byte1 != -1) { // If its not a lead byte, use ? or its value, then scoot them down & try again // This could happen if we previously had a bad 4 byte sequence and this is a trail byte if (!IsGBLeadByte(byte1)) { // This is either a ? or ASCII, need 1 char output if (byte1 <= 0x7f) { if (!buffer.AddChar((char)byte1)) // Its ASCII break; } else { if (!buffer.Fallback((byte)byte1)) // Not a valid byte break; } byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; continue; } // Read in more bytes as needed while (byte2 == -1 || (IsGBFourByteTrailing(byte2) && byte4 == -1)) { // Do we have room? if (!buffer.MoreData) { // No input left to read, do we have to flush? if (!decoder.MustFlush) { // Don't stick stuff in decoder when counting if (chars != null) { // Don't have to flush, won't have any chars // Decoder is correct, just return decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } // We'll have to flush, add a ? and scoot them down to try again // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out // Breaking will do this because we have zeros break; } // Read them in if (byte2 == -1) byte2 = buffer.GetNextByte(); else if (byte3 == -1) byte3 = buffer.GetNextByte(); else byte4 = buffer.GetNextByte(); } // Now we have our 2 or 4 bytes if (IsGBTwoByteTrailing(byte2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // int iTwoBytes = byte1 << 8; iTwoBytes |= unchecked((byte)byte2); if (!buffer.AddChar(mapBytesToUnicode[iTwoBytes], 2)) break; // We're done with it byte1 = -1; byte2 = -1; } else if (IsGBFourByteTrailing(byte2) && IsGBLeadByte(byte3) && IsGBFourByteTrailing(byte4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset( byte1, byte2, byte3, byte4); // What kind is it? if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode // We already checked our buffer space. // Do fallback here if we implement decoderfallbacks. if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4)) break; } // We're done with this one byte1 = -1; byte2 = -1; byte3 = -1; byte4 = -1; } else { // Not a valid sequence, use '?' for 1st byte & scoot them all down 1 if (!buffer.Fallback((byte)byte1)) break; // Move all bytes down 1 byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; } } } // Loop, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData) { byte ch = buffer.GetNextByte(); // ASCII case is easy if (ch <= 0x7f) { // ASCII, have room? if (!buffer.AddChar((char)ch)) break; // No room in convert buffer, so stop } // See if its a lead byte else if (IsGBLeadByte(ch)) { // ch is a lead byte, have room for more? if (buffer.MoreData) { byte ch2 = buffer.GetNextByte(); if (IsGBTwoByteTrailing(ch2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // // // Two-byte GB18030 // int iTwoBytes = ch << 8; iTwoBytes |= ch2; if (!buffer.AddChar(mapBytesToUnicode[iTwoBytes], 2)) break; } else if (IsGBFourByteTrailing(ch2)) { // Do we have room for Four Byte Sequence? (already have 1 byte) if (buffer.EvenMoreData(2)) { // Is it a valid 4 byte sequence? byte ch3 = buffer.GetNextByte(); byte ch4 = buffer.GetNextByte(); if (IsGBLeadByte(ch3) && IsGBFourByteTrailing(ch4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4); // What kind is it? // We'll be at least 1 BMP char or a '?' char. if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode if (!buffer.Fallback(ch, ch2, ch3, ch4)) break; } } else { // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again buffer.AdjustBytes(-3); if (!buffer.Fallback(ch)) break; } } else { // No room for 4 bytes, have 2 already, may be one more // Lead byte but no place to stick it if (decoder != null && !decoder.MustFlush) { // (make sure not to set decoder if counting, so check chars) if (chars != null) { // We'll be able to stick the remainder in the decoder byte1 = ch; byte2 = ch2; if (buffer.MoreData) byte3 = buffer.GetNextByte(); else byte3 = -1; byte4 = -1; } break; } // Won't go in decoder, we'll use '?' for it. if (!buffer.Fallback(ch, ch2)) break; } } else { // Unknown byte sequence, fall back lead byte and try 2nd one again buffer.AdjustBytes(-1); if (!buffer.Fallback(ch)) break; } } else { // Lead byte but don't know about trail byte // (make sure not to set decoder if counting, so check bytes) if (decoder != null && !decoder.MustFlush) { // We'll be able to stick it in the decoder // (don't actually do it when counting though) if (chars != null) { byte1 = ch; byte2 = -1; byte3 = -1; byte4 = -1; } break; } if (!buffer.Fallback(ch)) break; } } else { // Not ASCII and not a lead byte, we'll use '?' for it if we have room if (!buffer.Fallback(ch)) break; } } // Need to flush the decoder if necessary // (make sure not to set decoder if counting, so check bytes) if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return the # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP52936(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Debug.Assert(byteCount >= 0, "[ISO2022Encoding.GetCharsCP52936]count >=0"); Debug.Assert(bytes != null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null"); // Get our info. EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; int byteLeftOver = -1; bool bUsedDecoder = false; if (decoder != null) { currentMode = decoder.currentMode; // See if we have leftover decoder buffer to use // Don't want to mess up decoder if we're counting or throw an exception if (decoder.bytesLeftOverCount != 0) { // Load our bytesLeftOver byteLeftOver = decoder.bytesLeftOver[0]; } } // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData || byteLeftOver >= 0) { byte ch; // May have a left over byte if (byteLeftOver >= 0) { ch = (byte)byteLeftOver; byteLeftOver = -1; } else { ch = buffer.GetNextByte(); } // We're in escape mode if (ch == '~') { // Next char is type of switch if (!buffer.MoreData) { // We don't have anything left, it'll be in decoder or a ? // don't fail if we are allowing overflows if (decoder == null || decoder.MustFlush) { // We'll be a '?' buffer.Fallback(ch); // break if we fail & break if we don't (because !MoreData) // Add succeeded, continue break; } // Stick it in decoder if (decoder != null) decoder.ClearMustFlush(); if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = (byte)'~'; bUsedDecoder = true; } break; } // What type is it?, get 2nd byte ch = buffer.GetNextByte(); if (ch == '~' && currentMode == ISO2022Modes.ModeASCII) { // Its just a ~~ replacement for ~, add it if (!buffer.AddChar((char)ch, 2)) // Add failed, break for converting break; // Add succeeded, continue continue; } else if (ch == '{') { // Switching to Double Byte mode currentMode = ISO2022Modes.ModeHZ; continue; } else if (ch == '}') { // Switching to ASCII mode currentMode = ISO2022Modes.ModeASCII; continue; } else if (ch == '\n') { // Ignore ~\n sequence continue; } else { // Unknown escape, back up and try the '~' as a "normal" byte or lead byte buffer.AdjustBytes(-1); ch = (byte)'~'; } } // go ahead and add our data if (currentMode != ISO2022Modes.ModeASCII) { // Should be ModeHZ Debug.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ"); char cm; // Everett allowed characters < 0x20 to be passed as if they were ASCII if (ch < 0x20) { // Emit it as ASCII goto STOREASCII; } // Its multibyte, should have another byte if (!buffer.MoreData) { // No bytes left // don't fail if we are allowing overflows if (decoder == null || decoder.MustFlush) { // Not enough bytes, fallback lead byte buffer.Fallback(ch); // Break if we fail & break because !MoreData break; } if (decoder != null) decoder.ClearMustFlush(); // Stick it in decoder if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = ch; bUsedDecoder = true; } break; } // Everett uses space as an escape character for single SBCS bytes byte ch2 = buffer.GetNextByte(); ushort iBytes = (ushort)(ch << 8 | ch2); if (ch == ' ' && ch2 != 0) { // Get next char and treat it like ASCII (Everett treated space like an escape // allowing the next char to be just ascii) cm = (char)ch2; goto STOREMULTIBYTE; } // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) && // Everett allowed high bit mappings for same characters (but only if both bits set) (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe)) { // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp) if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d) { iBytes = 0x2121; goto MULTIBYTE; } // Illegal char, use fallback. If lead byte is 0 have to do it special and do it first if (!buffer.Fallback((byte)(iBytes >> 8), (byte)(iBytes))) break; continue; } MULTIBYTE: iBytes |= 0x8080; // Look up the multibyte char to stick it in our data // We have a iBytes to try to convert. cm = mapBytesToUnicode[iBytes]; STOREMULTIBYTE: // See if it was unknown if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Fall back the unknown stuff if (!buffer.Fallback((byte)(iBytes >> 8), (byte)(iBytes))) break; continue; } if (!buffer.AddChar(cm, 2)) break; // convert ran out of buffer, stop continue; } // Just ASCII // We allow some chars > 7f because everett did, so we have to look them up. STOREASCII: char c = mapBytesToUnicode[ch]; // Check if it was unknown if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0)) { // fallback the unkown bytes if (!buffer.Fallback((byte)ch)) break; continue; } // Go ahead and add our ASCII character if (!buffer.AddChar(c)) break; // convert ran out of buffer, stop } // Need to remember our state, IF we're not counting if (chars != null && decoder != null) { if (!bUsedDecoder) { // If we didn't use it, clear the byte left over decoder.bytesLeftOverCount = 0; } if (decoder.MustFlush && decoder.bytesLeftOverCount == 0) { decoder.currentMode = ISO2022Modes.ModeASCII; } else { // Either not flushing or had state (from convert) Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { // Get our info. EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII; // Mode that we'll shift in to byte[] escapeBytes = new byte[4]; int escapeCount = 0; if (decoder != null) { currentMode = decoder.currentMode; shiftInMode = decoder.shiftInOutMode; // See if we have leftover decoder buffer to use // Load our bytesLeftOver escapeCount = decoder.bytesLeftOverCount; // Don't want to mess up decoder if we're counting or throw an exception for (int i = 0; i < escapeCount; i++) escapeBytes[i] = decoder.bytesLeftOver[i]; } // Do this until the end while (buffer.MoreData || escapeCount > 0) { byte ch; if (escapeCount > 0) { // Get more escape sequences if necessary if (escapeBytes[0] == ESCAPE) { // Stop if no more input if (!buffer.MoreData) { if (decoder != null && !decoder.MustFlush) break; } else { // Add it to the sequence we can check escapeBytes[escapeCount++] = buffer.GetNextByte(); // We have an escape sequence ISO2022Modes modeReturn = CheckEscapeSequenceJP(escapeBytes, escapeCount); if (modeReturn != ISO2022Modes.ModeInvalidEscape) { if (modeReturn != ISO2022Modes.ModeIncompleteEscape) { // Processed escape correctly escapeCount = 0; // We're now this mode currentMode = shiftInMode = modeReturn; } // Either way, continue to get next escape or real byte continue; } } // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. } // Read next escape byte and move them down one. ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount); } else { // Get our next byte ch = buffer.GetNextByte(); if (ch == ESCAPE) { // We'll have an escape sequence, use it if we don't have one buffered already if (escapeCount == 0) { // Start this new escape sequence escapeBytes[0] = ch; escapeCount = 1; continue; } // Flush the previous escape sequence, then reuse this escape byte buffer.AdjustBytes(-1); } } if (ch == SHIFT_OUT) { shiftInMode = currentMode; currentMode = ISO2022Modes.ModeHalfwidthKatakana; continue; } else if (ch == SHIFT_IN) { currentMode = shiftInMode; continue; } // Get our full character ushort iBytes = ch; bool b2Bytes = false; if (currentMode == ISO2022Modes.ModeJIS0208) { // // To handle errors, we need to check: // 1. if trailbyte is there // 2. if code is valid // if (escapeCount > 0) { // Let another escape fall through if (escapeBytes[0] != ESCAPE) { // Move them down one & get the next data iBytes <<= 8; iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); b2Bytes = true; } } else if (buffer.MoreData) { iBytes <<= 8; iBytes |= buffer.GetNextByte(); b2Bytes = true; } else { // Not enough input, use decoder if possible if (decoder == null || decoder.MustFlush) { // No decoder, do fallback for this byte buffer.Fallback(ch); break; } // Stick it in the decoder if we're not counting if (chars != null) { escapeBytes[0] = ch; escapeCount = 1; } break; } // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana // escape, so use 0x8e00 as katakana lead byte and keep same trail byte. // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have // any wierd compatibility issues. if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00)) { iBytes = (ushort)(iBytes & 0xff); iBytes |= (LEADBYTE_HALFWIDTH << 8); // Put us in the halfwidth katakana range } } else if (iBytes >= 0xA1 && iBytes <= 0xDF) { // Everett accidentally mapped Katakana like shift-jis (932), // even though this is a 7 bit code page. We keep that mapping iBytes |= (LEADBYTE_HALFWIDTH << 8); // Map to halfwidth katakana range iBytes &= 0xff7f; // remove extra 0x80 } else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana) { // Add 0x10 lead byte that our encoding expects for Katakana: iBytes |= (LEADBYTE_HALFWIDTH << 8); } // We have an iBytes to try to convert. char c = mapBytesToUnicode[iBytes]; // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Have to do fallback if (b2Bytes) { if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes)) break; } else { if (!buffer.Fallback(ch)) break; } } else { // If we were JIS 0208, then we consumed an extra byte if (!buffer.AddChar(c, b2Bytes ? 2 : 1)) break; } } // Make sure our decoder state matches our mode, if not counting if (chars != null && decoder != null) { // Remember it if we don't flush if (!decoder.MustFlush || escapeCount != 0) { // Either not flushing or had state (from convert) Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; decoder.shiftInOutMode = shiftInMode; // Remember escape buffer decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = escapeBytes; } else { // We flush, clear buffer decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; // Slightly different if counting/not counting } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { // Get our info. EncodingCharBuffer buffer = new EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode byte[] escapeBytes = new byte[4]; int escapeCount = 0; if (decoder != null) { currentMode = decoder.currentMode; // See if we have leftover decoder buffer to use // Load our bytesLeftOver escapeCount = decoder.bytesLeftOverCount; // Don't want to mess up decoder if we're counting or throw an exception for (int i = 0; i < escapeCount; i++) escapeBytes[i] = decoder.bytesLeftOver[i]; } // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData || escapeCount > 0) { byte ch; if (escapeCount > 0) { // Get more escape sequences if necessary if (escapeBytes[0] == ESCAPE) { // Stop if no more input if (!buffer.MoreData) { if (decoder != null && !decoder.MustFlush) break; } else { // Add it to the sequence we can check escapeBytes[escapeCount++] = buffer.GetNextByte(); // We have an escape sequence ISO2022Modes modeReturn = CheckEscapeSequenceKR(escapeBytes, escapeCount); if (modeReturn != ISO2022Modes.ModeInvalidEscape) { if (modeReturn != ISO2022Modes.ModeIncompleteEscape) { // Processed escape correctly, no effect (we know about KR mode) escapeCount = 0; } // Either way, continue to get next escape or real byte continue; } } // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. } // Still have something left over in escape buffer // Get it and move them down one ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount); } else { // Get our next byte ch = buffer.GetNextByte(); if (ch == ESCAPE) { // We'll have an escape sequence, use it if we don't have one buffered already if (escapeCount == 0) { // Start this new escape sequence escapeBytes[0] = ch; escapeCount = 1; continue; } // Flush previous escape sequence, then reuse this escape byte buffer.AdjustBytes(-1); } } if (ch == SHIFT_OUT) { currentMode = ISO2022Modes.ModeKR; continue; } else if (ch == SHIFT_IN) { currentMode = ISO2022Modes.ModeASCII; continue; } // Get our full character ushort iBytes = ch; bool b2Bytes = false; // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC. if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n') { // // To handle errors, we need to check: // 1. if trailbyte is there // 2. if code is valid // if (escapeCount > 0) { // Let another escape fall through if (escapeBytes[0] != ESCAPE) { // Move them down one & get the next data iBytes <<= 8; iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); b2Bytes = true; } } else if (buffer.MoreData) { iBytes <<= 8; iBytes |= buffer.GetNextByte(); b2Bytes = true; } else { // Not enough input, use decoder if possible if (decoder == null || decoder.MustFlush) { // No decoder, do fallback for lonely 1st byte buffer.Fallback(ch); break; } // Stick it in the decoder if we're not counting if (chars != null) { escapeBytes[0] = ch; escapeCount = 1; } break; } } // We have a iBytes to try to convert. char c = mapBytesToUnicode[iBytes]; // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Have to do fallback if (b2Bytes) { if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes)) break; } else { if (!buffer.Fallback(ch)) break; } } else { if (!buffer.AddChar(c, b2Bytes ? 2 : 1)) break; } } // Make sure our decoder state matches our mode, if not counting if (chars != null && decoder != null) { // Remember it if we don't flush if (!decoder.MustFlush || escapeCount != 0) { // Either not flushing or had state (from convert) Debug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; // Remember escape buffer decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = escapeBytes; } else { // We flush, clear buffer decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }