[System.Security.SecurityCritical] // auto-generated public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder) { // Allow null bytes for counting Debug.Assert(chars != null, "[ISCIIEncoding.GetBytes]chars!=null"); // Debug.Assert(bytes != null, "[ISCIIEncoding.GetBytes]bytes!=null"); Debug.Assert(charCount >= 0, "[ISCIIEncoding.GetBytes]charCount >=0"); Debug.Assert(byteCount >= 0, "[ISCIIEncoding.GetBytes]byteCount >=0"); // Need the ISCII Encoder ISCIIEncoder encoder = (ISCIIEncoder)baseEncoder; // prepare our helpers EncodingByteBuffer buffer = new EncodingByteBuffer(this, encoder, bytes, byteCount, chars, charCount); int currentCodePage = _defaultCodePage; bool bLastVirama = false; // Use encoder info if available if (encoder != null) { // Remember our old state currentCodePage = encoder.currentCodePage; bLastVirama = encoder.bLastVirama; // If we have a high surrogate left over, then fall it back if (encoder.charLeftOver > 0) { buffer.Fallback(encoder.charLeftOver); bLastVirama = false; // Redundant } } while (buffer.MoreData) { // Get our data char ch = buffer.GetNextChar(); // See if its a Multi Byte Character if (ch < MultiByteBegin) { // Its a boring low character, add it. if (!buffer.AddByte((byte)ch)) break; bLastVirama = false; continue; } // See if its outside of the Indic script Range range if ((ch < IndicBegin) || (ch > IndicEnd)) { // See if its a ZWJ or ZWNJ and if we has bLastVirama; if (bLastVirama && (ch == ZWNJ || ch == ZWJ)) { // It was a bLastVirama and ZWNJ || ZWJ if (ch == ZWNJ) { if (!buffer.AddByte(Virama)) break; } else // ZWJ { if (!buffer.AddByte(Nukta)) break; } // bLastVirama now counts as false bLastVirama = false; continue; } // Have to do our fallback // // Note that this will fallback 2 chars if this is a high surrogate. // Throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); bLastVirama = false; continue; } // Its in the Unicode Indic script range int indicInfo = s_UnicodeToIndicChar[ch - IndicBegin]; byte byteIndic = (byte)indicInfo; int indicScript = (0x000f & (indicInfo >> 8)); int indicTwoBytes = (0xf000 & indicInfo); // If IndicInfo is 0 then have to do fallback if (indicInfo == 0) { // Its some Unicode character we don't have indic for. // Have to do our fallback // Add Fallback Count // Note that chars was preincremented, and GetEncoderFallbackString might add an extra // if chars != charEnd and there's a surrogate. // Throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); bLastVirama = false; continue; } // See if our code page ("font" in ISCII spec) has to change // (This if doesn't add character, just changes character set) Debug.Assert(indicScript != 0, "[ISCIIEncoding.GetBytes]expected an indic script value"); if (indicScript != currentCodePage) { // It changed, spit out the ATR if (!buffer.AddByte(ControlATR, (byte)(indicScript | ControlCodePageStart))) break; // Now spit out the new code page (& remember it) (do this afterwards in case AddByte failed) currentCodePage = indicScript; // We only know how to map from Unicode to pages from Devanagari to Punjabi (2 to 11) Debug.Assert(currentCodePage >= CodeDevanagari && currentCodePage <= CodePunjabi, "[ISCIIEncoding.GetBytes]Code page (" + currentCodePage + " shouldn't appear in ISCII from Unicode table!"); } // Safe to add our byte now if (!buffer.AddByte(byteIndic, indicTwoBytes != 0 ? 1 : 0)) break; // Remember if this one was a Virama bLastVirama = (byteIndic == Virama); // Some characters need extra bytes if (indicTwoBytes != 0) { // This one needs another byte Debug.Assert((indicTwoBytes >> 12) > 0 && (indicTwoBytes >> 12) <= 3, "[ISCIIEncoding.GetBytes]Expected indicTwoBytes from 1-3, not " + (indicTwoBytes >> 12)); // Already did buffer checking, but... if (!buffer.AddByte(s_SecondIndicByte[indicTwoBytes >> 12])) break; } } // May need to switch back to our default code page if (currentCodePage != _defaultCodePage && (encoder == null || encoder.MustFlush)) { // It changed, spit out the ATR if (buffer.AddByte(ControlATR, (byte)(_defaultCodePage | ControlCodePageStart))) currentCodePage = _defaultCodePage; else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); bLastVirama = false; } // Make sure we remember our state if necessary // Note that we don't care about flush because Virama and code page // changes are legal at the end. // Don't set encoder if we're just counting if (encoder != null && bytes != null) { // Clear Encoder if necessary. if (!buffer.fallbackBufferHelper.bUsedEncoder) { encoder.charLeftOver = (char)0; } // Remember our code page/virama state encoder.currentCodePage = currentCodePage; encoder.bLastVirama = bLastVirama; // How many chars were used? encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null bytes as a count // Debug.Assert(bytes != null, "[GB18030Encoding.GetBytes]bytes is null"); Debug.Assert(byteCount >= 0, "[GB18030Encoding.GetBytes]byteCount is negative"); Debug.Assert(chars != null, "[GB18030Encoding.GetBytes]chars is null"); Debug.Assert(charCount >= 0, "[GB18030Encoding.GetBytes]charCount is negative"); // Assert because we shouldn't be able to have a null encoder. Debug.Assert(EncoderFallback != null, "[GB18030Encoding.GetBytes]Attempting to use null encoder fallback"); // Get any left over characters char charLeftOver = (char)0; if (encoder != null) charLeftOver = encoder.charLeftOver; // prepare our helpers EncodingByteBuffer buffer = new EncodingByteBuffer(this, encoder, bytes, byteCount, chars, charCount); // Try again if we were MustFlush TryAgain: // Go ahead and do it, including the fallback. while (buffer.MoreData) { // Get next char char ch = buffer.GetNextChar(); // Have to check for charLeftOver if (charLeftOver != 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[GB18030Encoding.GetBytes] leftover character should be high surrogate, not 0x" + ((int)charLeftOver).ToString("X4", CultureInfo.InvariantCulture)); // If our next char isn't a low surrogate, then we need to do fallback. if (!Char.IsLowSurrogate(ch)) { // No low surrogate, fallback high surrogate & try this one again buffer.MovePrevious(false); // (Ignoring this character, don't throw) if (!buffer.Fallback(charLeftOver)) { charLeftOver = (char)0; break; } charLeftOver = (char)0; continue; } else { // Next is a surrogate, add it as surrogate pair // Need 4 bytes for surrogates // Get our offset int offset = ((charLeftOver - 0xd800) << 10) + (ch - 0xdc00); byte byte4 = (byte)((offset % 0x0a) + 0x30); offset /= 0x0a; byte byte3 = (byte)((offset % 0x7e) + 0x81); offset /= 0x7e; byte byte2 = (byte)((offset % 0x0a) + 0x30); offset /= 0x0a; Debug.Assert(offset < 0x6f, "[GB18030Encoding.GetBytes](1) Expected offset < 0x6f, not 0x" + offset.ToString("X2", CultureInfo.InvariantCulture)); charLeftOver = (char)0; if (!buffer.AddByte((byte)(offset + 0x90), byte2, byte3, byte4)) { // Didn't work, need to back up for both surrogates (AddByte already backed up one) buffer.MovePrevious(false); // (don't throw) break; } } charLeftOver = '\0'; } // ASCII's easiest else if (ch <= 0x7f) { // Need a byte if (!buffer.AddByte((byte)ch)) break; } // See if its a surrogate pair else if (Char.IsHighSurrogate(ch)) { // Remember it for next time charLeftOver = ch; } else if (Char.IsLowSurrogate(ch)) { // Low surrogates should've been found already if (!buffer.Fallback(ch)) break; } else { // Not surrogate or ASCII, get value ushort iBytes = mapUnicodeToBytes[ch]; // See what kind it is if (Is4Byte(ch)) { // // This Unicode character will be converted to four-byte GB18030. // // Need 4 bytes byte byte4 = (byte)((iBytes % 0x0a) + 0x30); iBytes /= 0x0a; byte byte3 = (byte)((iBytes % 0x7e) + 0x81); iBytes /= 0x7e; byte byte2 = (byte)((iBytes % 0x0a) + 0x30); iBytes /= 0x0a; Debug.Assert(iBytes < 0x7e, "[GB18030Encoding.GetBytes]Expected iBytes < 0x7e, not 0x" + iBytes.ToString("X2", CultureInfo.InvariantCulture)); if (!buffer.AddByte((byte)(iBytes + 0x81), byte2, byte3, byte4)) break; } else { // Its 2 byte, use it if (!buffer.AddByte(unchecked((byte)(iBytes >> 8)), unchecked((byte)(iBytes & 0xff)))) break; } } } // Do we need to flush our charLeftOver? if ((encoder == null || encoder.MustFlush) && (charLeftOver > 0)) { // Fall it back buffer.Fallback(charLeftOver); charLeftOver = (char)0; goto TryAgain; } // Fallback stuck it in encoder if necessary, but we have to clear MustFlash cases // (Check bytes != null, don't clear it if we're just counting) if (encoder != null) { // Remember our charLeftOver if (bytes != null) encoder.charLeftOver = charLeftOver; encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetBytesCP50225KR(char* chars, int charCount, byte* bytes, int byteCount, ISO2022Encoder encoder) { // prepare our helpers EncodingByteBuffer buffer = new EncodingByteBuffer(this, encoder, bytes, byteCount, chars, charCount); // Get our mode ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Mode ISO2022Modes shiftOutMode = ISO2022Modes.ModeASCII; // ModeKR if already stamped lead bytes // Check our encoder if (encoder != null) { // May have leftover stuff char charLeftOver = encoder.charLeftOver; currentMode = encoder.currentMode; shiftOutMode = encoder.shiftInOutMode; // We may have a l left over character from last time, try and process it. if (charLeftOver > 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP50225KR]leftover character should be high surrogate"); // It has to be a high surrogate, which we don't support, so it has to be a fallback buffer.Fallback(charLeftOver); } } while (buffer.MoreData) { // Get our data char ch = buffer.GetNextChar(); // Get our bytes ushort iBytes = mapUnicodeToBytes[ch]; // Check for double byte bytes byte bLeadByte = (byte)(iBytes >> 8); byte bTrailByte = (byte)(iBytes & 0xff); if (bLeadByte != 0) { // // It's a double byte character. // // If we haven't done our Korean designator, then do so, if we have any input if (shiftOutMode != ISO2022Modes.ModeKR) { // Add our code page designator sequence if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)')'), unchecked((byte)'C'))) break; // No room during convert. shiftOutMode = ISO2022Modes.ModeKR; } // May have to switch to ModeKR first if (currentMode != ISO2022Modes.ModeKR) { if (!buffer.AddByte(SHIFT_OUT)) break; // No convert room currentMode = ISO2022Modes.ModeKR; } // Add the bytes if (!buffer.AddByte(bLeadByte, bTrailByte)) break; // no convert room continue; } else if (iBytes != 0 || ch == 0) { // Its a single byte character, switch to ASCII if we have to if (currentMode != ISO2022Modes.ModeASCII) { if (!buffer.AddByte(SHIFT_IN)) break; currentMode = ISO2022Modes.ModeASCII; } // Add the ASCII char if (!buffer.AddByte(bTrailByte)) break; continue; } // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); } // Switch back to ASCII if MustFlush or no encoder if (currentMode != ISO2022Modes.ModeASCII && (encoder == null || encoder.MustFlush)) { // Get back to ASCII to be safe. Only do it if it success. if (buffer.AddByte(SHIFT_IN)) currentMode = ISO2022Modes.ModeASCII; else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); } // Remember our encoder state if (bytes != null && encoder != null) { // If we didn't use the encoder, then there's no chars left over if (!buffer.fallbackBufferHelper.bUsedEncoder) { encoder.charLeftOver = (char)0; } // This is ASCII if we had to flush encoder.currentMode = currentMode; // We don't use shift out mode, but if we've flushed we need to reset it so it doesn't // get output again. if (!encoder.MustFlush || encoder.charLeftOver != (char)0) { // We should be not flushing or converting Debug.Assert(!encoder.MustFlush || !encoder.m_throwOnOverflow, "[ISO2022Encoding.GetBytesCP50225KR]Expected no left over data or not flushing or not converting"); encoder.shiftInOutMode = shiftOutMode; } else encoder.shiftInOutMode = ISO2022Modes.ModeASCII; encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetBytesCP52936(char* chars, int charCount, byte* bytes, int byteCount, ISO2022Encoder encoder) { // prepare our helpers EncodingByteBuffer buffer = new EncodingByteBuffer(this, encoder, bytes, byteCount, chars, charCount); // Mode ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Check our encoder if (encoder != null) { char charLeftOver = encoder.charLeftOver; currentMode = encoder.currentMode; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP52936]leftover character should be high surrogate"); // It has to be a high surrogate, which we don't support, so it has to be a fallback buffer.Fallback(charLeftOver); } } while (buffer.MoreData) { // Get our char char ch = buffer.GetNextChar(); // Get our bytes ushort sChar = mapUnicodeToBytes[ch]; if (sChar == 0 && ch != 0) { // Wasn't a legal byte sequence, its a surrogate or fallback // Throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); // Done with our char, now process fallback continue; } // Check for halfwidth bytes byte bLeadByte = (byte)(sChar >> 8); byte bTrailByte = (byte)(sChar & 0xff); // If its a double byte, it has to fit in the lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe range // (including the 0x8080 that our codepage or's to the value) if ((bLeadByte != 0 && (bLeadByte < 0xa1 || bLeadByte > 0xf7 || bTrailByte < 0xa1 || bTrailByte > 0xfe)) || (bLeadByte == 0 && bTrailByte > 0x80 && bTrailByte != 0xff)) { // Illegal character, in 936 code page, but not in HZ subset, get fallback for it buffer.Fallback(ch); continue; } // sChar is now either ASCII or has an 0x8080 mask if (bLeadByte != 0) { // Its a double byte mode if (currentMode != ISO2022Modes.ModeHZ) { // Need to add the double byte mode marker if (!buffer.AddByte((byte)'~', (byte)'{', 2)) break; // Stop if no buffer space in convert currentMode = ISO2022Modes.ModeHZ; } // Go ahead and add the 2 bytes if (!buffer.AddByte(unchecked((byte)(bLeadByte & 0x7f)), unchecked((byte)(bTrailByte & 0x7f)))) break; // Stop if no buffer space in convert } else { // Its supposed to be ASCII if (currentMode != ISO2022Modes.ModeASCII) { // Need to add the ASCII mode marker // Will have 1 more byte (or 2 if ~) if (!buffer.AddByte((byte)'~', (byte)'}', bTrailByte == '~' ? 2 : 1)) break; currentMode = ISO2022Modes.ModeASCII; } // If its a '~' we'll need an extra one if (bTrailByte == '~') { // Need to add the extra ~ if (!buffer.AddByte((byte)'~', 1)) break; } // Need to add the character if (!buffer.AddByte(bTrailByte)) break; } } // Add ASCII shift out if we're at end of decoder if (currentMode != ISO2022Modes.ModeASCII && (encoder == null || encoder.MustFlush)) { // Need to add the ASCII mode marker // Only turn off other mode if this works if (buffer.AddByte((byte)'~', (byte)'}')) currentMode = ISO2022Modes.ModeASCII; else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); } // Need to remember our mode if (encoder != null && bytes != null) { // This is ASCII if we had to flush encoder.currentMode = currentMode; if (!buffer.fallbackBufferHelper.bUsedEncoder) { encoder.charLeftOver = (char)0; } encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }
[System.Security.SecurityCritical] // auto-generated private unsafe int GetBytesCP5022xJP(char* chars, int charCount, byte* bytes, int byteCount, ISO2022Encoder encoder) { // prepare our helpers EncodingByteBuffer buffer = new EncodingByteBuffer(this, encoder, bytes, byteCount, chars, charCount); // Get our mode ISO2022Modes currentMode = ISO2022Modes.ModeASCII; // Mode ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII; // Mode that shift in will go back to (only used by CP 50222) // Check our encoder if (encoder != null) { char charLeftOver = encoder.charLeftOver; currentMode = encoder.currentMode; shiftInMode = encoder.shiftInOutMode; // We may have a left over character from last time, try and process it. if (charLeftOver > 0) { Debug.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP5022xJP]leftover character should be high surrogate"); // It has to be a high surrogate, which we don't support, so it has to be a fallback buffer.Fallback(charLeftOver); } } while (buffer.MoreData) { // Get our char char ch = buffer.GetNextChar(); // Get our bytes ushort iBytes = mapUnicodeToBytes[ch]; StartConvert: // Check for halfwidth bytes byte bLeadByte = (byte)(iBytes >> 8); byte bTrailByte = (byte)(iBytes & 0xff); if (bLeadByte == LEADBYTE_HALFWIDTH) { // Its Halfwidth Katakana if (CodePage == 50220) { // CodePage 50220 doesn't use halfwidth Katakana, convert to fullwidth // See if its out of range, fallback if so, throws if recursive fallback if (bTrailByte < 0x21 || bTrailByte >= 0x21 + s_HalfToFullWidthKanaTable.Length) { buffer.Fallback(ch); continue; } // Get the full width katakana char to use. iBytes = unchecked((ushort)(s_HalfToFullWidthKanaTable[bTrailByte - 0x21] & 0x7F7F)); // May have to do all sorts of fun stuff for mode, go back to start convert goto StartConvert; } // Can use halfwidth Katakana, make sure we're in right mode // Make sure we're in right mode if (currentMode != ISO2022Modes.ModeHalfwidthKatakana) { // 50222 or 50221, either shift in/out or escape to get to Katakana mode if (CodePage == 50222) { // Shift Out if (!buffer.AddByte(SHIFT_OUT)) break; // convert out of space, stop // Don't change modes until after AddByte in case it fails for convert // We get to shift out to Katakana, make sure we'll go back to the right mode // (This ends up always being ASCII) shiftInMode = currentMode; currentMode = ISO2022Modes.ModeHalfwidthKatakana; } else { // 50221 does halfwidth katakana by escape sequence Debug.Assert(CodePage == 50221, "[ISO2022Encoding.GetBytesCP5022xJP]Expected Code Page 50221"); // Add our escape sequence if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'I'))) break; // convert out of space, stop currentMode = ISO2022Modes.ModeHalfwidthKatakana; } } // We know we're in Katakana mode now, so add it. // Go ahead and add the Katakana byte. Our table tail bytes are 0x80 too big. if (!buffer.AddByte(unchecked((byte)(bTrailByte & 0x7F)))) break; // convert out of space, stop // Done with this one continue; } else if (bLeadByte != 0) { // // It's a double byte character. // // If we're CP 50222 we may have to shift in from Katakana mode first if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) { // Shift In if (!buffer.AddByte(SHIFT_IN)) break; // convert out of space, stop // Need to shift in from katakana. (Still might not be right, but won't be shifted out anyway) currentMode = shiftInMode; } // Make sure we're in the right mode (JIS 0208 or JIS 0212) // Note: Right now we don't use JIS 0212. Also this table'd be wrong // Its JIS extension 0208 if (currentMode != ISO2022Modes.ModeJIS0208) { // Escape sequence, we can fail after this, mode will be correct for convert if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)'B'))) break; // Convert out of space, stop currentMode = ISO2022Modes.ModeJIS0208; } // Add our double bytes if (!buffer.AddByte(unchecked((byte)(bLeadByte)), unchecked((byte)(bTrailByte)))) break; // Convert out of space, stop continue; } else if (iBytes != 0 || ch == 0) { // Single byte Char // If we're CP 50222 we may have to shift in from Katakana mode first if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) { // Shift IN if (!buffer.AddByte(SHIFT_IN)) break; // convert ran out of room // Need to shift in from katakana. (Still might not be right, but won't be shifted out anyway) currentMode = shiftInMode; } // Its a single byte character, switch to ASCII if we have to if (currentMode != ISO2022Modes.ModeASCII) { if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B'))) break; // convert ran out of room currentMode = ISO2022Modes.ModeASCII; } // Add the ASCII char if (!buffer.AddByte(bTrailByte)) break; // convert had no room left continue; } // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar) buffer.Fallback(ch); } // Switch back to ASCII if MustFlush or no encoder if (currentMode != ISO2022Modes.ModeASCII && (encoder == null || encoder.MustFlush)) { // If we're CP 50222 we may have to shift in from Katakana mode first if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana) { // Shift IN, only shift mode if necessary. if (buffer.AddByte(SHIFT_IN)) // Need to shift in from katakana. (Still might not be right, but won't be shifted out anyway) currentMode = shiftInMode; else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); } // switch back to ASCII to finish neatly if (currentMode != ISO2022Modes.ModeASCII && (CodePage != 50222 || currentMode != ISO2022Modes.ModeHalfwidthKatakana)) { // only shift if it was successful if (buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B'))) currentMode = ISO2022Modes.ModeASCII; else // If not successful, convert will maintain state for next time, also // AddByte will have decremented our char count, however we need it to remain the same buffer.GetNextChar(); } } // Remember our encoder state if (bytes != null && encoder != null) { // This is ASCII if we had to flush encoder.currentMode = currentMode; encoder.shiftInOutMode = shiftInMode; if (!buffer.fallbackBufferHelper.bUsedEncoder) { encoder.charLeftOver = (char)0; } encoder.m_charsUsed = buffer.CharsUsed; } // Return our length return buffer.Count; }