[System.Security.SecurityCritical] // auto-generated internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { Contract.Assert(byteCount >=0, "[UTF7Encoding.GetChars]byteCount >=0"); Contract.Assert(bytes!=null, "[UTF7Encoding.GetChars]bytes!=null"); Contract.Assert(charCount >=0, "[UTF7Encoding.GetChars]charCount >=0"); // Might use a decoder UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder) baseDecoder; // Get our output buffer info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Get decoder info int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; Contract.Assert(firstByte == false || decoder.bitCount <= 0, "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set"); } // We may have had bits in the decoder that we couldn't output last time, so do so now if (bitCount >= 16) { // Check our decoder buffer if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF))) ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert // Used this one, clean up extra bits bitCount -= 16; } // Loop through the input while (buffer.MoreData) { byte currentByte = buffer.GetNextByte(); int c; if (bitCount >= 0) { // // Modified base 64 encoding. // sbyte v; if (currentByte < 0x80 && ((v = base64Values[currentByte]) >=0)) { firstByte = false; bits = (bits << 6) | ((byte)v); bitCount += 6; if (bitCount >= 16) { c = (bits >> (bitCount - 16)) & 0xFFFF; bitCount -= 16; } // If not enough bits just continue else continue; } else { // If it wasn't a base 64 byte, everything's going to turn off base 64 mode bitCount = -1; if (currentByte != '-') { // >= 0x80 (because of 1st if statemtn) // We need this check since the base64Values[b] check below need b <= 0x7f. // This is not a valid base 64 byte. Terminate the shifted-sequence and // emit this byte. // not in base 64 table // According to the RFC 1642 and the example code of UTF-7 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte // Chars won't be updated unless this works, try to fallback if (!buffer.Fallback(currentByte)) break; // Stop here, didn't throw // Used that byte, we're done with it continue; } // // The encoding for '+' is "+-". // if (firstByte) c = '+'; // We just turn it off if not emitting a +, so we're done. else continue; } // // End of modified base 64 encoding block. // } else if (currentByte == '+') { // // Found the start of a modified base 64 encoding block or a plus sign. // bitCount = 0; firstByte = true; continue; } else { // Normal character if (currentByte >= 0x80) { // Try to fallback if (!buffer.Fallback(currentByte)) break; // Stop here, didn't throw // Done falling back continue; } // Use the normal character c = currentByte; } if (c >= 0) { // Check our buffer if (!buffer.AddChar((char)c)) { // No room. If it was a plain char we'll try again later. // Note, we'll consume this byte and stick it in decoder, even if we can't output it if (bitCount >= 0) // Can we rememmber this byte (char) { buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed bitCount += 16; // We'll still need that char we have in our bits } break; // didn't throw, stop } } } // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder) if (chars != null && decoder != null) { // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer) if (decoder.MustFlush) { // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder.m_bytesUsed = buffer.BytesUsed; } // else ignore any hanging bits. // Return our count return buffer.Count; }
// For decoding, the following interesting rules apply: // Virama followed by another Virama or Nukta becomes Virama + ZWNJ or Virama + ZWJ // ATR is followed by a byte to switch code pages ("fonts") // Devenagari F0, B8 -> \u0952 // Devenagari F0, BF -> \u0970 // Some characters followed by E9 become a different character instead. internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // Allow null chars for counting BCLDebug.Assert(bytes != null, "[ISCIIEncoding.GetChars]bytes is null"); BCLDebug.Assert(byteCount >= 0, "[ISCIIEncoding.GetChars]byteCount is negative"); // BCLDebug.Assert(chars != null, "[ISCIIEncoding.GetChars]chars is null"); BCLDebug.Assert(charCount >= 0, "[ISCIIEncoding.GetChars]charCount is negative"); // Need the ISCII Decoder ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder; // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); int currentCodePage = this.defaultCodePage; bool bLastATR = false; bool bLastVirama = false; bool bLastDevenagariStressAbbr = false; char cLastCharForNextNukta = '\0'; char cLastCharForNoNextNukta = '\0'; // See if there's anything in our decoder if (decoder != null) { currentCodePage = decoder.currentCodePage; bLastATR = decoder.bLastATR; bLastVirama = decoder.bLastVirama; bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr; cLastCharForNextNukta = decoder.cLastCharForNextNukta; cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta; } bool bLastSpecial = bLastVirama | bLastATR | bLastDevenagariStressAbbr | (cLastCharForNextNukta != '\0'); // Get our current code page index (some code pages are dups) int currentCodePageIndex = -1; BCLDebug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi, "[ISCIIEncoding.GetChars]Decoder code page must be >= Devanagari and <= Punjabi, not " + currentCodePage); if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = IndicMappingIndex[currentCodePage]; } // Loop through our input while (buffer.MoreData) { byte b = buffer.GetNextByte(); // See if last one was special if (bLastSpecial) { // Now it won't be bLastSpecial = false; // One and only one of our flags should be set BCLDebug.Assert(((bLastVirama ? 1 : 0) + (bLastATR ? 1 : 0) + (bLastDevenagariStressAbbr ? 1 : 0) + ((cLastCharForNextNukta > 0) ? 1 : 0)) == 1, String.Format(CultureInfo.InvariantCulture, "[ISCIIEncoding.GetChars]Special cases require 1 and only 1 special case flag: LastATR {0} Dev. {1} Nukta {2}", bLastATR, bLastDevenagariStressAbbr, cLastCharForNextNukta)); // If the last one was an ATR, then we'll have to do ATR stuff if (bLastATR) { // No longer last ATR, we know it wasn't bLastVirama bLastATR = false; // We only support Devanagari - Punjabi if (b >= (0x40 | CodeDevanagari) && b <= (0x40 | CodePunjabi)) { // Remember the code page currentCodePage = b & 0xf; currentCodePageIndex = IndicMappingIndex[currentCodePage]; continue; } // Change back to default? if (b == 0x40) { currentCodePage = this.defaultCodePage; currentCodePageIndex = -1; if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = IndicMappingIndex[currentCodePage]; } continue; } // We don't support Roman if (b == 0x41) { currentCodePage = this.defaultCodePage; currentCodePageIndex = -1; if (currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi) { currentCodePageIndex = IndicMappingIndex[currentCodePage]; } // Even though we don't know how to support Roman, windows didn't add a ? so we don't either. continue; } // Other code pages & ATR codes not supported, fallback the ATR buffer.Fallback(ControlATR); // turn off things bLastVirama = false; bLastATR = false; bLastDevenagariStressAbbr = false; cLastCharForNextNukta = (char)0; cLastCharForNoNextNukta = (char)0; // Keep processing this byte } else if (bLastVirama) { // If last was Virama, then we might need ZWNJ or ZWJ instead if (b == Virama) { // If no room, then stop if (!buffer.AddChar(ZWNJ)) break; bLastVirama = false; continue; } if (b == Nukta) { // If no room, then stop if (!buffer.AddChar(ZWJ)) break; bLastVirama = false; continue; } bLastVirama = false; } else if (bLastDevenagariStressAbbr) { // Last byte was an f0 (ext). // If current is b8 or bf, then we have 952 or 970. Otherwise fallback if (b == 0xb8) { // It was a b8 if (!buffer.AddChar('\x0952')) // Devanagari stress sign anudatta break; bLastDevenagariStressAbbr = false; continue; } if (b == 0xbf) { // It was a bf if (!buffer.AddChar('\x0970')) // Devanagari abbr. sign break; bLastDevenagariStressAbbr = false; continue; } // Wasn't an expected pattern, do fallback for f0 (ext) and bLastDevenagariStressAbbr = false; buffer.Fallback(DevenagariExt); // Keep processing this byte } else { // We were checking for next char being a nukta BCLDebug.Assert(cLastCharForNextNukta > 0 && cLastCharForNoNextNukta > 0, "[ISCIIEncoding.GetChars]No other special case found, but cLastCharFor(No)NextNukta variable(s) aren't set."); // We'll either add combined char or last char if (b == Nukta) { // We combine nukta with previous char if (!buffer.AddChar(cLastCharForNextNukta)) break; // Done already cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; continue; } // No Nukta, just add last character and keep processing current byte if (!buffer.AddChar(cLastCharForNoNextNukta)) break; cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; // Keep processing this byte } } // Now bLastSpecial should be false and all flags false. BCLDebug.Assert (!bLastSpecial && !bLastDevenagariStressAbbr && !bLastVirama && !bLastATR && cLastCharForNextNukta == '\0', "[ISCIIEncoding.GetChars]No special state for last code point should exist at this point."); // If its a simple byte, just add it if (b < MultiByteBegin) { if (!buffer.AddChar((char)b)) break; continue; } // See if its an ATR marker if (b == ControlATR) { bLastATR = bLastSpecial = true; continue; } BCLDebug.Assert (currentCodePageIndex != -1, "[ISCIIEncoding.GetChars]Expected valid currentCodePageIndex != -1"); char ch = IndicMapping[currentCodePageIndex, 0, b - MultiByteBegin]; char cAlt = IndicMapping[currentCodePageIndex, 1, b - MultiByteBegin]; // If no 2nd char, just add it, also lonely Nuktas get added as well. if (cAlt == 0 || b == Nukta) { // If it was an unknown character do fallback // ? if not known. if (ch == 0) { if (!buffer.Fallback(b)) break; } else { if (!buffer.AddChar(ch)) break; } continue; } // if b == Virama set last Virama so we can do ZWJ or ZWNJ next time if needed. if (b == Virama) { if (!buffer.AddChar(ch)) break; bLastVirama = bLastSpecial = true; continue; } // See if its one that changes with a Nukta if ((cAlt & 0xF000) == 0) { // It could change if next char is a nukta bLastSpecial = true; cLastCharForNextNukta = cAlt; cLastCharForNoNextNukta = ch; continue; } // We must be the Devenagari special case for F0, B8 & F0, BF BCLDebug.Assert(currentCodePage == CodeDevanagari && b == DevenagariExt, String.Format(CultureInfo.InvariantCulture, "[ISCIIEncoding.GetChars] Devenagari special case must {0} not {1} or in Devanagari code page {2} not {3}.", DevenagariExt, b, CodeDevanagari, currentCodePage)); bLastDevenagariStressAbbr = bLastSpecial = true; } // If we don't have a decoder, or if we had to flush, then we need to get rid // of last ATR, LastNoNextNukta and LastDevenagariExt. if (decoder == null || decoder.MustFlush) { // If these fail (because of Convert with insufficient buffer), then they'll turn off MustFlush as well. if (bLastATR) { // Have to add ATR fallback if (buffer.Fallback(ControlATR)) bLastATR = false; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } else if (bLastDevenagariStressAbbr) { // Have to do fallback for DevenagariExt if (buffer.Fallback(DevenagariExt)) bLastDevenagariStressAbbr = false; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } else if (cLastCharForNoNextNukta != '\0') { // Have to add our last char because there was no next nukta if (buffer.AddChar(cLastCharForNoNextNukta)) cLastCharForNoNextNukta = cLastCharForNextNukta = '\0'; else // If not successful, convert will maintain state for next time, also // AddChar will have decremented our byte count, however we need it to remain the same buffer.GetNextByte(); } // LastVirama is unimportant for flushing decoder. } // Remember any left over stuff // (only remember if we aren't counting) if (decoder != null && chars != null) { // If not flushing or have state (from convert) then need to remember state if (!decoder.MustFlush || cLastCharForNoNextNukta != '\0' || bLastATR || bLastDevenagariStressAbbr) { // Either not flushing or had state (from convert) BCLDebug.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISCIIEncoding.GetChars]Expected no state or not converting or not flushing"); decoder.currentCodePage = currentCodePage; decoder.bLastVirama = bLastVirama; decoder.bLastATR = bLastATR; decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr; decoder.cLastCharForNextNukta = cLastCharForNextNukta; decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta; } else { decoder.currentCodePage = this.defaultCodePage; decoder.bLastVirama = false; decoder.bLastATR = false; decoder.bLastDevenagariStressAbbr = false; decoder.cLastCharForNextNukta = '\0'; decoder.cLastCharForNoNextNukta = '\0'; } decoder.m_bytesUsed = buffer.BytesUsed; } // Otherwise we already did fallback and added extra things // Return the # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP52936(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Contract.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0"); Contract.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null"); // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; int byteLeftOver = -1; bool bUsedDecoder = false; if (decoder != null) { currentMode = decoder.currentMode; // See if we have leftover decoder buffer to use // Don't want to mess up decoder if we're counting or throw an exception if (decoder.bytesLeftOverCount != 0 ) { // Load our bytesLeftOver byteLeftOver = decoder.bytesLeftOver[0]; } } // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData || byteLeftOver >= 0) { byte ch; // May have a left over byte if (byteLeftOver >= 0) { ch = (byte)byteLeftOver; byteLeftOver = -1; } else { ch = buffer.GetNextByte(); } // We're in escape mode if (ch == '~') { // Next char is type of switch if (!buffer.MoreData) { // We don't have anything left, it'll be in decoder or a ? // don't fail if we are allowing overflows if (decoder == null || decoder.MustFlush) { // We'll be a '?' buffer.Fallback(ch); // break if we fail & break if we don't (because !MoreData) // Add succeeded, continue break; } // Stick it in decoder if (decoder != null) decoder.ClearMustFlush(); if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = (byte)'~'; bUsedDecoder = true; } break; } // What type is it?, get 2nd byte ch = buffer.GetNextByte(); if (ch == '~' && currentMode == ISO2022Modes.ModeASCII) { // Its just a ~~ replacement for ~, add it if (!buffer.AddChar((char)ch, 2)) // Add failed, break for converting break; // Add succeeded, continue continue; } else if (ch == '{') { // Switching to Double Byte mode currentMode = ISO2022Modes.ModeHZ; continue; } else if (ch == '}') { // Switching to ASCII mode currentMode = ISO2022Modes.ModeASCII; continue; } else if (ch == '\n') { // Ignore ~\n sequence continue; } else { // Unknown escape, back up and try the '~' as a "normal" byte or lead byte buffer.AdjustBytes(-1); ch = (byte)'~'; } } // go ahead and add our data if (currentMode != ISO2022Modes.ModeASCII) { // Should be ModeHZ Contract.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ"); char cm; // Everett allowed characters < 0x20 to be passed as if they were ASCII if (ch < 0x20) { // Emit it as ASCII goto STOREASCII; } // Its multibyte, should have another byte if (!buffer.MoreData) { // No bytes left // don't fail if we are allowing overflows if (decoder == null || decoder.MustFlush) { // Not enough bytes, fallback lead byte buffer.Fallback(ch); // Break if we fail & break because !MoreData break; } if (decoder != null) decoder.ClearMustFlush(); // Stick it in decoder if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = ch; bUsedDecoder = true; } break; } // Everett uses space as an escape character for single SBCS bytes byte ch2 = buffer.GetNextByte(); ushort iBytes = (ushort)(ch << 8 | ch2); if (ch == ' ' && ch2 != 0) { // Get next char and treat it like ASCII (Everett treated space like an escape // allowing the next char to be just ascii) cm = (char)ch2; goto STOREMULTIBYTE; } // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) && // Everett allowed high bit mappings for same characters (but only if both bits set) (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe)) { // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp) if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d) { iBytes = 0x2121; goto MULTIBYTE; } // Illegal char, use fallback. If lead byte is 0 have to do it special and do it first if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) break; continue; } MULTIBYTE: iBytes |= 0x8080; // Look up the multibyte char to stick it in our data // We have a iBytes to try to convert. cm = mapBytesToUnicode[iBytes]; STOREMULTIBYTE: // See if it was unknown if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Fall back the unknown stuff if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes))) break; continue; } if (!buffer.AddChar(cm, 2)) break; // convert ran out of buffer, stop continue; } // Just ASCII // We allow some chars > 7f because everett did, so we have to look them up. STOREASCII: char c = mapBytesToUnicode[ch]; // Check if it was unknown if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0)) { // fallback the unkown bytes if (!buffer.Fallback((byte)ch)) break; continue; } // Go ahead and add our ASCII character if (!buffer.AddChar(c)) break; // convert ran out of buffer, stop } // Need to remember our state, IF we're not counting if (chars != null && decoder != null) { if (!bUsedDecoder) { // If we didn't use it, clear the byte left over decoder.bytesLeftOverCount = 0; } if (decoder.MustFlush && decoder.bytesLeftOverCount == 0) { decoder.currentMode = ISO2022Modes.ModeASCII; } else { // Either not flushing or had state (from convert) Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII; // Mode that we'll shift in to byte[] escapeBytes = new byte[4]; int escapeCount = 0; if (decoder != null) { currentMode = decoder.currentMode; shiftInMode = decoder.shiftInOutMode; // See if we have leftover decoder buffer to use // Load our bytesLeftOver escapeCount = decoder.bytesLeftOverCount; // Don't want to mess up decoder if we're counting or throw an exception for (int i = 0; i < escapeCount; i++) escapeBytes[i] = decoder.bytesLeftOver[i]; } // Do this until the end while (buffer.MoreData || escapeCount > 0) { byte ch; if (escapeCount > 0) { // Get more escape sequences if necessary if (escapeBytes[0] == ESCAPE) { // Stop if no more input if (!buffer.MoreData) { if (decoder != null && !decoder.MustFlush) break; } else { // Add it to the sequence we can check escapeBytes[escapeCount++] = buffer.GetNextByte(); // We have an escape sequence ISO2022Modes modeReturn = CheckEscapeSequenceJP(escapeBytes, escapeCount); if (modeReturn != ISO2022Modes.ModeInvalidEscape) { if (modeReturn != ISO2022Modes.ModeIncompleteEscape) { // Processed escape correctly escapeCount = 0; // We're now this mode currentMode = shiftInMode = modeReturn; } // Either way, continue to get next escape or real byte continue; } } // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. } // Read next escape byte and move them down one. ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount); } else { // Get our next byte ch = buffer.GetNextByte(); if (ch == ESCAPE) { // We'll have an escape sequence, use it if we don't have one buffered already if (escapeCount == 0) { // Start this new escape sequence escapeBytes[0] = ch; escapeCount = 1; continue; } // Flush the previous escape sequence, then reuse this escape byte buffer.AdjustBytes(-1); } } if (ch == SHIFT_OUT) { shiftInMode = currentMode; currentMode = ISO2022Modes.ModeHalfwidthKatakana; continue; } else if (ch == SHIFT_IN) { currentMode = shiftInMode; continue; } // Get our full character ushort iBytes = ch; bool b2Bytes = false; if (currentMode == ISO2022Modes.ModeJIS0208) { // // To handle errors, we need to check: // 1. if trailbyte is there // 2. if code is valid // if (escapeCount > 0) { // Let another escape fall through if (escapeBytes[0] != ESCAPE) { // Move them down one & get the next data iBytes <<= 8; iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); b2Bytes = true; } } else if (buffer.MoreData) { iBytes <<= 8; iBytes |= buffer.GetNextByte(); b2Bytes = true; } else { // Not enough input, use decoder if possible if (decoder == null || decoder.MustFlush) { // No decoder, do fallback for this byte buffer.Fallback(ch); break; } // Stick it in the decoder if we're not counting if (chars != null) { escapeBytes[0] = ch; escapeCount = 1; } break; } // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana // escape, so use 0x8e00 as katakana lead byte and keep same trail byte. // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have // any wierd compatibility issues. if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00)) { iBytes = (ushort)(iBytes & 0xff); iBytes |= (LEADBYTE_HALFWIDTH << 8); // Put us in the halfwidth katakana range } } else if (iBytes >= 0xA1 && iBytes <= 0xDF) { // Everett accidentally mapped Katakana like shift-jis (932), // even though this is a 7 bit code page. We keep that mapping iBytes |= (LEADBYTE_HALFWIDTH << 8); // Map to halfwidth katakana range iBytes &= 0xff7f; // remove extra 0x80 } else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana ) { // Add 0x10 lead byte that our encoding expects for Katakana: iBytes |= (LEADBYTE_HALFWIDTH << 8); } // We have an iBytes to try to convert. char c = mapBytesToUnicode[iBytes]; // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Have to do fallback if (b2Bytes) { if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes)) break; } else { if (!buffer.Fallback(ch)) break; } } else { // If we were JIS 0208, then we consumed an extra byte if (!buffer.AddChar(c, b2Bytes ? 2:1)) break; } } // Make sure our decoder state matches our mode, if not counting if (chars != null && decoder != null) { // Remember it if we don't flush if (!decoder.MustFlush || escapeCount != 0) { // Either not flushing or had state (from convert) Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; decoder.shiftInOutMode = shiftInMode; // Remember escape buffer decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = escapeBytes; } else { // We flush, clear buffer decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; // Slightly different if counting/not counting } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { ISCIIDecoder decoder = (ISCIIDecoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); int defaultCodePage = this.defaultCodePage; bool bLastATR = false; bool bLastVirama = false; bool bLastDevenagariStressAbbr = false; char cLastCharForNextNukta = '\0'; char cLastCharForNoNextNukta = '\0'; if (decoder != null) { defaultCodePage = decoder.currentCodePage; bLastATR = decoder.bLastATR; bLastVirama = decoder.bLastVirama; bLastDevenagariStressAbbr = decoder.bLastDevenagariStressAbbr; cLastCharForNextNukta = decoder.cLastCharForNextNukta; cLastCharForNoNextNukta = decoder.cLastCharForNoNextNukta; } bool flag4 = ((bLastVirama | bLastATR) | bLastDevenagariStressAbbr) | (cLastCharForNextNukta != '\0'); int num2 = -1; if ((defaultCodePage >= 2) && (defaultCodePage <= 11)) { num2 = IndicMappingIndex[defaultCodePage]; } while (buffer.MoreData) { byte nextByte = buffer.GetNextByte(); if (flag4) { flag4 = false; if (bLastATR) { if ((nextByte >= 0x42) && (nextByte <= 0x4b)) { defaultCodePage = nextByte & 15; num2 = IndicMappingIndex[defaultCodePage]; bLastATR = false; continue; } if (nextByte == 0x40) { defaultCodePage = this.defaultCodePage; num2 = -1; if ((defaultCodePage >= 2) && (defaultCodePage <= 11)) { num2 = IndicMappingIndex[defaultCodePage]; } bLastATR = false; continue; } if (nextByte == 0x41) { defaultCodePage = this.defaultCodePage; num2 = -1; if ((defaultCodePage >= 2) && (defaultCodePage <= 11)) { num2 = IndicMappingIndex[defaultCodePage]; } bLastATR = false; continue; } if (!buffer.Fallback((byte) 0xef)) { break; } bLastATR = false; } else if (bLastVirama) { if (nextByte == 0xe8) { if (!buffer.AddChar('')) { break; } bLastVirama = false; continue; } if (nextByte == 0xe9) { if (!buffer.AddChar('')) { break; } bLastVirama = false; continue; } bLastVirama = false; } else if (bLastDevenagariStressAbbr) { if (nextByte == 0xb8) { if (!buffer.AddChar('॒')) { break; } bLastDevenagariStressAbbr = false; continue; } if (nextByte == 0xbf) { if (!buffer.AddChar('॰')) { break; } bLastDevenagariStressAbbr = false; continue; } if (!buffer.Fallback((byte) 240)) { break; } bLastDevenagariStressAbbr = false; } else { if (nextByte == 0xe9) { if (!buffer.AddChar(cLastCharForNextNukta)) { break; } cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; continue; } if (!buffer.AddChar(cLastCharForNoNextNukta)) { break; } cLastCharForNextNukta = cLastCharForNoNextNukta = '\0'; } } if (nextByte < 160) { if (buffer.AddChar((char) nextByte)) { continue; } break; } if (nextByte == 0xef) { bLastATR = flag4 = true; } else { char ch = IndicMapping[num2, 0, nextByte - 160]; char ch4 = IndicMapping[num2, 1, nextByte - 160]; if ((ch4 == '\0') || (nextByte == 0xe9)) { if (ch == '\0') { if (buffer.Fallback(nextByte)) { continue; } } else if (buffer.AddChar(ch)) { continue; } break; } if (nextByte == 0xe8) { if (!buffer.AddChar(ch)) { break; } bLastVirama = flag4 = true; } else { if ((ch4 & 0xf000) == 0) { flag4 = true; cLastCharForNextNukta = ch4; cLastCharForNoNextNukta = ch; continue; } bLastDevenagariStressAbbr = flag4 = true; } } } if ((decoder == null) || decoder.MustFlush) { if (bLastATR) { if (buffer.Fallback((byte) 0xef)) { bLastATR = false; } else { buffer.GetNextByte(); } } else if (bLastDevenagariStressAbbr) { if (buffer.Fallback((byte) 240)) { bLastDevenagariStressAbbr = false; } else { buffer.GetNextByte(); } } else if (cLastCharForNoNextNukta != '\0') { if (buffer.AddChar(cLastCharForNoNextNukta)) { cLastCharForNoNextNukta = cLastCharForNextNukta = '\0'; } else { buffer.GetNextByte(); } } } if ((decoder != null) && (chars != null)) { if ((!decoder.MustFlush || (cLastCharForNoNextNukta != '\0')) || (bLastATR || bLastDevenagariStressAbbr)) { decoder.currentCodePage = defaultCodePage; decoder.bLastVirama = bLastVirama; decoder.bLastATR = bLastATR; decoder.bLastDevenagariStressAbbr = bLastDevenagariStressAbbr; decoder.cLastCharForNextNukta = cLastCharForNextNukta; decoder.cLastCharForNoNextNukta = cLastCharForNoNextNukta; } else { decoder.currentCodePage = this.defaultCodePage; decoder.bLastVirama = false; decoder.bLastATR = false; decoder.bLastDevenagariStressAbbr = false; decoder.cLastCharForNextNukta = '\0'; decoder.cLastCharForNoNextNukta = '\0'; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // No mode information yet ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Our current Mode byte[] escapeBytes = new byte[4]; int escapeCount = 0; if (decoder != null) { currentMode = decoder.currentMode; // See if we have leftover decoder buffer to use // Load our bytesLeftOver escapeCount = decoder.bytesLeftOverCount; // Don't want to mess up decoder if we're counting or throw an exception for (int i = 0; i < escapeCount; i++) escapeBytes[i] = decoder.bytesLeftOver[i]; } // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData || escapeCount > 0) { byte ch; if (escapeCount > 0) { // Get more escape sequences if necessary if (escapeBytes[0] == ESCAPE) { // Stop if no more input if (!buffer.MoreData) { if (decoder != null && !decoder.MustFlush) break; } else { // Add it to the sequence we can check escapeBytes[escapeCount++] = buffer.GetNextByte(); // We have an escape sequence ISO2022Modes modeReturn = CheckEscapeSequenceKR(escapeBytes, escapeCount); if (modeReturn != ISO2022Modes.ModeInvalidEscape) { if (modeReturn != ISO2022Modes.ModeIncompleteEscape) { // Processed escape correctly, no effect (we know about KR mode) escapeCount = 0; } // Either way, continue to get next escape or real byte continue; } } // If ModeInvalidEscape, or no input & must flush, then fall through to add escape. } // Still have something left over in escape buffer // Get it and move them down one ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount); } else { // Get our next byte ch = buffer.GetNextByte(); if (ch == ESCAPE) { // We'll have an escape sequence, use it if we don't have one buffered already if (escapeCount == 0) { // Start this new escape sequence escapeBytes[0] = ch; escapeCount = 1; continue; } // Flush previous escape sequence, then reuse this escape byte buffer.AdjustBytes(-1); } } if (ch == SHIFT_OUT) { currentMode = ISO2022Modes.ModeKR; continue; } else if (ch == SHIFT_IN) { currentMode = ISO2022Modes.ModeASCII; continue; } // Get our full character ushort iBytes = ch; bool b2Bytes = false; // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC. if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n') { // // To handle errors, we need to check: // 1. if trailbyte is there // 2. if code is valid // if (escapeCount > 0) { // Let another escape fall through if (escapeBytes[0] != ESCAPE) { // Move them down one & get the next data iBytes <<= 8; iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount); b2Bytes = true; } } else if (buffer.MoreData) { iBytes <<= 8; iBytes |= buffer.GetNextByte(); b2Bytes = true; } else { // Not enough input, use decoder if possible if (decoder == null || decoder.MustFlush) { // No decoder, do fallback for lonely 1st byte buffer.Fallback(ch); break; } // Stick it in the decoder if we're not counting if (chars != null) { escapeBytes[0] = ch; escapeCount = 1; } break; } } // We have a iBytes to try to convert. char c = mapBytesToUnicode[iBytes]; // See if it was unknown if (c == UNKNOWN_CHAR_FLAG && iBytes != 0) { // Have to do fallback if (b2Bytes) { if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes)) break; } else { if (!buffer.Fallback(ch)) break; } } else { if (!buffer.AddChar(c, b2Bytes ? 2:1)) break; } } // Make sure our decoder state matches our mode, if not counting if (chars != null && decoder != null) { // Remember it if we don't flush if (!decoder.MustFlush || escapeCount != 0) { // Either not flushing or had state (from convert) Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow, "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing"); decoder.currentMode = currentMode; // Remember escape buffer decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = escapeBytes; } else { // We flush, clear buffer decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return # of characters we found return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { Decoder decoder = (Decoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); int bits = 0; int bitCount = -1; bool firstByte = false; if (decoder != null) { bits = decoder.bits; bitCount = decoder.bitCount; firstByte = decoder.firstByte; } if (bitCount >= 0x10) { if (!buffer.AddChar((char) ((bits >> (bitCount - 0x10)) & 0xffff))) { base.ThrowCharsOverflow(decoder, true); } bitCount -= 0x10; } while (buffer.MoreData) { int num4; byte nextByte = buffer.GetNextByte(); if (bitCount >= 0) { sbyte num5; if ((nextByte < 0x80) && ((num5 = this.base64Values[nextByte]) >= 0)) { firstByte = false; bits = (bits << 6) | ((byte) num5); bitCount += 6; if (bitCount < 0x10) { continue; } num4 = (bits >> (bitCount - 0x10)) & 0xffff; bitCount -= 0x10; goto Label_00FB; } bitCount = -1; if (nextByte == 0x2d) { if (!firstByte) { continue; } num4 = 0x2b; goto Label_00FB; } if (buffer.Fallback(nextByte)) { continue; } break; } if (nextByte == 0x2b) { bitCount = 0; firstByte = true; continue; } if (nextByte >= 0x80) { if (buffer.Fallback(nextByte)) { continue; } break; } num4 = nextByte; Label_00FB: if ((num4 >= 0) && !buffer.AddChar((char) num4)) { if (bitCount >= 0) { buffer.AdjustBytes(1); bitCount += 0x10; } break; } } if ((chars != null) && (decoder != null)) { if (decoder.MustFlush) { decoder.bits = 0; decoder.bitCount = -1; decoder.firstByte = false; } else { decoder.bits = bits; decoder.bitCount = bitCount; decoder.firstByte = firstByte; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null chars as a count BCLDebug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null"); BCLDebug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative"); // BCLDebug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null"); BCLDebug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative"); // Fix our decoder GB18030Decoder decoder = (GB18030Decoder)baseDecoder; // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Need temp bytes because we can't muss up decoder short byte1 = -1; short byte2 = -1; short byte3 = -1; short byte4 = -1; // See if there was anything to get out of the decoder if (decoder != null && decoder.bLeftOver1 != -1) { // Need temp bytes because we can't muss up decoder byte1 = decoder.bLeftOver1; byte2 = decoder.bLeftOver2; byte3 = decoder.bLeftOver3; byte4 = decoder.bLeftOver4; // Loop because we might have too many in buffer // This could happen if we are working on a 4 byte sequence, but it isn't valid. while (byte1 != -1) { // If its not a lead byte, use ? or its value, then scoot them down & try again // This could happen if we previously had a bad 4 byte sequence and this is a trail byte if (!IsGBLeadByte(byte1)) { // This is either a ? or ASCII, need 1 char output if (byte1 <= 0x7f) { if (!buffer.AddChar((char)byte1)) // Its ASCII break; } else { if (!buffer.Fallback((byte)byte1)) // Not a valid byte break; } byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; continue; } // Read in more bytes as needed while (byte2 == -1 || (IsGBFourByteTrailing(byte2) && byte4 == -1)) { // Do we have room? if (!buffer.MoreData) { // No input left to read, do we have to flush? if (!decoder.MustFlush) { // Don't stick stuff in decoder when counting if (chars != null) { // Don't have to flush, won't have any chars // Decoder is correct, just return decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } // We'll have to flush, add a ? and scoot them down to try again // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out // Breaking will do this because we have zeros break; } // Read them in if (byte2 == -1) byte2 = buffer.GetNextByte(); else if (byte3 == -1) byte3 = buffer.GetNextByte(); else byte4 = buffer.GetNextByte(); } // Now we have our 2 or 4 bytes if (IsGBTwoByteTrailing(byte2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // int iTwoBytes = byte1 << 8; iTwoBytes |= unchecked((byte)byte2); if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2)) break; // We're done with it byte1 = -1; byte2 = -1; } else if (IsGBFourByteTrailing(byte2) && IsGBLeadByte(byte3) && IsGBFourByteTrailing(byte4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset( byte1, byte2, byte3, byte4); // What kind is it? if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode // We already checked our buffer space. // Do fallback here if we impliment decoderfallbacks. if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4)) break; } // We're done with this one byte1 = -1; byte2 = -1; byte3 = -1; byte4 = -1; } else { // Not a valid sequence, use '?' for 1st byte & scoot them all down 1 if (!buffer.Fallback((byte)byte1)) break; // Move all bytes down 1 byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; } } } // Loop, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData) { byte ch = buffer.GetNextByte(); // ASCII case is easy if (ch <= 0x7f) { // ASCII, have room? if (!buffer.AddChar((char)ch)) break; // No room in convert buffer, so stop } // See if its a lead byte else if (IsGBLeadByte(ch)) { // ch is a lead byte, have room for more? if (buffer.MoreData) { byte ch2 = buffer.GetNextByte(); if (IsGBTwoByteTrailing(ch2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // // // Two-byte GB18030 // int iTwoBytes = ch << 8; iTwoBytes |= ch2; if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2)) break; } else if (IsGBFourByteTrailing(ch2)) { // Do we have room for Four Byte Sequence? (already have 1 byte) if (buffer.EvenMoreData(2)) { // Is it a valid 4 byte sequence? byte ch3 = buffer.GetNextByte(); byte ch4 = buffer.GetNextByte(); if (IsGBLeadByte(ch3) && IsGBFourByteTrailing(ch4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4); // What kind is it? // We'll be at least 1 BMP char or a '?' char. if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset],4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))),4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode if (!buffer.Fallback(ch, ch2, ch3, ch4)) break; } } else { // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again buffer.AdjustBytes(-3); if (!buffer.Fallback(ch)) break; } } else { // No room for 4 bytes, have 2 already, may be one more // Lead byte but no place to stick it if (decoder != null && !decoder.MustFlush) { // (make sure not to set decoder if counting, so check chars) if (chars != null) { // We'll be able to stick the remainder in the decoder byte1 = ch; byte2 = ch2; if (buffer.MoreData) byte3 = buffer.GetNextByte(); else byte3 = -1; byte4=-1; } break; } // Won't go in decoder, we'll use '?' for it. if (!buffer.Fallback(ch, ch2)) break; } } else { // Unknown byte sequence, fall back lead byte and try 2nd one again buffer.AdjustBytes(-1); if (!buffer.Fallback(ch)) break; } } else { // Lead byte but don't know about trail byte // (make sure not to set decoder if counting, so check bytes) if (decoder != null && !decoder.MustFlush) { // We'll be able to stick it in the decoder // (don't actually do it when counting though) if (chars != null) { byte1 = ch; byte2 = -1; byte3 = -1; byte4 = -1; } break; } if (!buffer.Fallback(ch)) break; } } else { // Not ASCII and not a lead byte, we'll use '?' for it if we have room if (!buffer.Fallback(ch)) break; } } // Need to flush the decoder if necessary // (make sure not to set decoder if counting, so check bytes) if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return the # of characters we found return buffer.Count; }
private unsafe int GetCharsCP52936(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); ISO2022Modes modeASCII = ISO2022Modes.ModeASCII; int num = -1; bool flag = false; if (decoder != null) { modeASCII = decoder.currentMode; if (decoder.bytesLeftOverCount != 0) { num = decoder.bytesLeftOver[0]; } } while (buffer.MoreData || (num >= 0)) { byte nextByte; if (num >= 0) { nextByte = (byte) num; num = -1; } else { nextByte = buffer.GetNextByte(); } if (nextByte == 0x7e) { if (!buffer.MoreData) { if ((decoder == null) || decoder.MustFlush) { buffer.Fallback(nextByte); } else { if (decoder != null) { decoder.ClearMustFlush(); } if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = 0x7e; flag = true; } } break; } nextByte = buffer.GetNextByte(); if ((nextByte == 0x7e) && (modeASCII == ISO2022Modes.ModeASCII)) { if (buffer.AddChar((char) nextByte, 2)) { continue; } break; } if (nextByte == 0x7b) { modeASCII = ISO2022Modes.ModeHZ; continue; } if (nextByte == 0x7d) { modeASCII = ISO2022Modes.ModeASCII; continue; } if (nextByte == 10) { continue; } buffer.AdjustBytes(-1); nextByte = 0x7e; } if ((modeASCII != ISO2022Modes.ModeASCII) && (nextByte >= 0x20)) { if (!buffer.MoreData) { if ((decoder == null) || decoder.MustFlush) { buffer.Fallback(nextByte); } else { if (decoder != null) { decoder.ClearMustFlush(); } if (chars != null) { decoder.bytesLeftOverCount = 1; decoder.bytesLeftOver[0] = nextByte; flag = true; } } } else { char ch; byte num3 = buffer.GetNextByte(); ushort index = (ushort) ((nextByte << 8) | num3); if ((nextByte == 0x20) && (num3 != 0)) { ch = (char) num3; } else { if ((((nextByte < 0x21) || (nextByte > 0x77)) || ((num3 < 0x21) || (num3 > 0x7e))) && (((nextByte < 0xa1) || (nextByte > 0xf7)) || ((num3 < 0xa1) || (num3 > 0xfe)))) { if (((num3 == 0x20) && (0x21 <= nextByte)) && (nextByte <= 0x7d)) { index = 0x2121; } else { if (buffer.Fallback((byte) (index >> 8), (byte) index)) { continue; } break; } } index = (ushort) (index | 0x8080); ch = base.mapBytesToUnicode[index]; } if ((ch == '\0') && (index != 0)) { if (buffer.Fallback((byte) (index >> 8), (byte) index)) { continue; } } else if (buffer.AddChar(ch, 2)) { continue; } } break; } char ch2 = base.mapBytesToUnicode[nextByte]; if (((ch2 == '\0') || (ch2 == '\0')) && (nextByte != 0)) { if (buffer.Fallback(nextByte)) { continue; } break; } if (!buffer.AddChar(ch2)) { break; } } if ((chars != null) && (decoder != null)) { if (!flag) { decoder.bytesLeftOverCount = 0; } if (decoder.MustFlush && (decoder.bytesLeftOverCount == 0)) { decoder.currentMode = ISO2022Modes.ModeASCII; } else { decoder.currentMode = modeASCII; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder) { Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); ISO2022Modes modeASCII = ISO2022Modes.ModeASCII; ISO2022Modes shiftInOutMode = ISO2022Modes.ModeASCII; byte[] buffer2 = new byte[4]; int escapeCount = 0; if (decoder != null) { modeASCII = decoder.currentMode; shiftInOutMode = decoder.shiftInOutMode; escapeCount = decoder.bytesLeftOverCount; for (int i = 0; i < escapeCount; i++) { buffer2[i] = decoder.bytesLeftOver[i]; } } while (buffer.MoreData || (escapeCount > 0)) { byte nextByte; if (escapeCount > 0) { if (buffer2[0] == 0x1b) { if (!buffer.MoreData) { if ((decoder != null) && !decoder.MustFlush) { break; } } else { buffer2[escapeCount++] = buffer.GetNextByte(); ISO2022Modes modes3 = this.CheckEscapeSequenceJP(buffer2, escapeCount); if (modes3 != ISO2022Modes.ModeInvalidEscape) { if (modes3 != ISO2022Modes.ModeIncompleteEscape) { escapeCount = 0; modeASCII = shiftInOutMode = modes3; } continue; } } } nextByte = this.DecrementEscapeBytes(ref buffer2, ref escapeCount); } else { nextByte = buffer.GetNextByte(); if (nextByte == 0x1b) { if (escapeCount == 0) { buffer2[0] = nextByte; escapeCount = 1; continue; } buffer.AdjustBytes(-1); } } if (nextByte == 14) { shiftInOutMode = modeASCII; modeASCII = ISO2022Modes.ModeHalfwidthKatakana; } else { if (nextByte == 15) { modeASCII = shiftInOutMode; continue; } ushort index = nextByte; bool flag = false; if (modeASCII == ISO2022Modes.ModeJIS0208) { if (escapeCount > 0) { if (buffer2[0] != 0x1b) { index = (ushort) (index << 8); index = (ushort) (index | this.DecrementEscapeBytes(ref buffer2, ref escapeCount)); flag = true; } } else if (buffer.MoreData) { index = (ushort) (index << 8); index = (ushort) (index | buffer.GetNextByte()); flag = true; } else { if ((decoder == null) || decoder.MustFlush) { buffer.Fallback(nextByte); } else if (chars != null) { buffer2[0] = nextByte; escapeCount = 1; } break; } if (flag && ((index & 0xff00) == 0x2a00)) { index = (ushort) (index & 0xff); index = (ushort) (index | 0x1000); } } else if ((index >= 0xa1) && (index <= 0xdf)) { index = (ushort) (index | 0x1000); index = (ushort) (index & 0xff7f); } else if (modeASCII == ISO2022Modes.ModeHalfwidthKatakana) { index = (ushort) (index | 0x1000); } char ch = base.mapBytesToUnicode[index]; if ((ch == '\0') && (index != 0)) { if (flag) { if (buffer.Fallback((byte) (index >> 8), (byte) index)) { continue; } } else if (buffer.Fallback(nextByte)) { continue; } break; } if (!buffer.AddChar(ch, flag ? 2 : 1)) { break; } } } if ((chars != null) && (decoder != null)) { if (!decoder.MustFlush || (escapeCount != 0)) { decoder.currentMode = modeASCII; decoder.shiftInOutMode = shiftInOutMode; decoder.bytesLeftOverCount = escapeCount; decoder.bytesLeftOver = buffer2; } else { decoder.currentMode = ISO2022Modes.ModeASCII; decoder.shiftInOutMode = ISO2022Modes.ModeASCII; decoder.bytesLeftOverCount = 0; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { GB18030Decoder decoder = (GB18030Decoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); short ch = -1; short nextByte = -1; short num3 = -1; short num4 = -1; if ((decoder != null) && (decoder.bLeftOver1 != -1)) { ch = decoder.bLeftOver1; nextByte = decoder.bLeftOver2; num3 = decoder.bLeftOver3; num4 = decoder.bLeftOver4; while (ch != -1) { if (this.IsGBLeadByte(ch)) { goto Label_00FC; } if (ch <= 0x7f) { if (!buffer.AddChar((char) ((ushort) ch))) { break; } } else if (!buffer.Fallback((byte) ch)) { break; } ch = nextByte; nextByte = num3; num3 = num4; num4 = -1; continue; Label_0092: if (!buffer.MoreData) { if (!decoder.MustFlush) { if (chars != null) { decoder.bLeftOver1 = ch; decoder.bLeftOver2 = nextByte; decoder.bLeftOver3 = num3; decoder.bLeftOver4 = num4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } goto Label_010E; } if (nextByte == -1) { nextByte = buffer.GetNextByte(); } else if (num3 == -1) { num3 = buffer.GetNextByte(); } else { num4 = buffer.GetNextByte(); } Label_00FC: if ((nextByte == -1) || (this.IsGBFourByteTrailing(nextByte) && (num4 == -1))) { goto Label_0092; } Label_010E: if (this.IsGBTwoByteTrailing(nextByte)) { int index = ch << 8; index |= (byte) nextByte; if (!buffer.AddChar(base.mapBytesToUnicode[index], 2)) { break; } ch = -1; nextByte = -1; continue; } if ((this.IsGBFourByteTrailing(nextByte) && this.IsGBLeadByte(num3)) && this.IsGBFourByteTrailing(num4)) { int num6 = this.GetFourBytesOffset(ch, nextByte, num3, num4); if (num6 <= 0x99fb) { if (!buffer.AddChar(this.map4BytesToUnicode[num6], 4)) { break; } } else if ((num6 >= 0x2e248) && (num6 <= 0x12e247)) { num6 -= 0x2e248; if (!buffer.AddChar((char) (0xd800 + (num6 / 0x400)), (char) (0xdc00 + (num6 % 0x400)), 4)) { break; } } else if (!buffer.Fallback((byte) ch, (byte) nextByte, (byte) num3, (byte) num4)) { break; } ch = -1; nextByte = -1; num3 = -1; num4 = -1; continue; } if (!buffer.Fallback((byte) ch)) { break; } ch = nextByte; nextByte = num3; num3 = num4; num4 = -1; } } while (buffer.MoreData) { byte num7 = buffer.GetNextByte(); if (num7 <= 0x7f) { if (buffer.AddChar((char) num7)) { continue; } break; } if (this.IsGBLeadByte(num7)) { if (buffer.MoreData) { byte num8 = buffer.GetNextByte(); if (this.IsGBTwoByteTrailing(num8)) { int num9 = num7 << 8; num9 |= num8; if (buffer.AddChar(base.mapBytesToUnicode[num9], 2)) { continue; } } else if (this.IsGBFourByteTrailing(num8)) { if (buffer.EvenMoreData(2)) { byte num10 = buffer.GetNextByte(); byte num11 = buffer.GetNextByte(); if (this.IsGBLeadByte(num10) && this.IsGBFourByteTrailing(num11)) { int num12 = this.GetFourBytesOffset(num7, num8, num10, num11); if (num12 <= 0x99fb) { if (buffer.AddChar(this.map4BytesToUnicode[num12], 4)) { continue; } } else if ((num12 >= 0x2e248) && (num12 <= 0x12e247)) { num12 -= 0x2e248; if (buffer.AddChar((char) (0xd800 + (num12 / 0x400)), (char) (0xdc00 + (num12 % 0x400)), 4)) { continue; } } else if (buffer.Fallback(num7, num8, num10, num11)) { continue; } } else { buffer.AdjustBytes(-3); if (buffer.Fallback(num7)) { continue; } } } else if ((decoder != null) && !decoder.MustFlush) { if (chars != null) { ch = num7; nextByte = num8; if (buffer.MoreData) { num3 = buffer.GetNextByte(); } else { num3 = -1; } num4 = -1; } } else if (buffer.Fallback(num7, num8)) { continue; } } else { buffer.AdjustBytes(-1); if (buffer.Fallback(num7)) { continue; } } } else if ((decoder != null) && !decoder.MustFlush) { if (chars != null) { ch = num7; nextByte = -1; num3 = -1; num4 = -1; } } else if (buffer.Fallback(num7)) { continue; } break; } if (!buffer.Fallback(num7)) { break; } } if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = ch; decoder.bLeftOver2 = nextByte; decoder.bLeftOver3 = num3; decoder.bLeftOver4 = num4; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }