internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { // Just need to ASSERT, this is called by something else internal that checked parameters already // We'll allow null chars as a count BCLDebug.Assert(bytes != null, "[GB18030Encoding.GetChars]bytes is null"); BCLDebug.Assert(byteCount >= 0, "[GB18030Encoding.GetChars]byteCount is negative"); // BCLDebug.Assert(chars != null, "[GB18030Encoding.GetChars]chars is null"); BCLDebug.Assert(charCount >= 0, "[GB18030Encoding.GetChars]charCount is negative"); // Fix our decoder GB18030Decoder decoder = (GB18030Decoder)baseDecoder; // Get our info. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( this, decoder, chars, charCount, bytes, byteCount); // Need temp bytes because we can't muss up decoder short byte1 = -1; short byte2 = -1; short byte3 = -1; short byte4 = -1; // See if there was anything to get out of the decoder if (decoder != null && decoder.bLeftOver1 != -1) { // Need temp bytes because we can't muss up decoder byte1 = decoder.bLeftOver1; byte2 = decoder.bLeftOver2; byte3 = decoder.bLeftOver3; byte4 = decoder.bLeftOver4; // Loop because we might have too many in buffer // This could happen if we are working on a 4 byte sequence, but it isn't valid. while (byte1 != -1) { // If its not a lead byte, use ? or its value, then scoot them down & try again // This could happen if we previously had a bad 4 byte sequence and this is a trail byte if (!IsGBLeadByte(byte1)) { // This is either a ? or ASCII, need 1 char output if (byte1 <= 0x7f) { if (!buffer.AddChar((char)byte1)) // Its ASCII break; } else { if (!buffer.Fallback((byte)byte1)) // Not a valid byte break; } byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; continue; } // Read in more bytes as needed while (byte2 == -1 || (IsGBFourByteTrailing(byte2) && byte4 == -1)) { // Do we have room? if (!buffer.MoreData) { // No input left to read, do we have to flush? if (!decoder.MustFlush) { // Don't stick stuff in decoder when counting if (chars != null) { // Don't have to flush, won't have any chars // Decoder is correct, just return decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } // We'll have to flush, add a ? and scoot them down to try again // We could be trying for a 4 byte sequence but byte 3 could be ascii and should be spit out // Breaking will do this because we have zeros break; } // Read them in if (byte2 == -1) byte2 = buffer.GetNextByte(); else if (byte3 == -1) byte3 = buffer.GetNextByte(); else byte4 = buffer.GetNextByte(); } // Now we have our 2 or 4 bytes if (IsGBTwoByteTrailing(byte2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // int iTwoBytes = byte1 << 8; iTwoBytes |= unchecked((byte)byte2); if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2)) break; // We're done with it byte1 = -1; byte2 = -1; } else if (IsGBFourByteTrailing(byte2) && IsGBLeadByte(byte3) && IsGBFourByteTrailing(byte4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset( byte1, byte2, byte3, byte4); // What kind is it? if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset], 4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))), 4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode // We already checked our buffer space. // Do fallback here if we impliment decoderfallbacks. if (!buffer.Fallback((byte)byte1, (byte)byte2, (byte)byte3, (byte)byte4)) break; } // We're done with this one byte1 = -1; byte2 = -1; byte3 = -1; byte4 = -1; } else { // Not a valid sequence, use '?' for 1st byte & scoot them all down 1 if (!buffer.Fallback((byte)byte1)) break; // Move all bytes down 1 byte1 = byte2; byte2 = byte3; byte3 = byte4; byte4 = -1; } } } // Loop, just do '?' replacement because we don't have fallbacks for decodings. while (buffer.MoreData) { byte ch = buffer.GetNextByte(); // ASCII case is easy if (ch <= 0x7f) { // ASCII, have room? if (!buffer.AddChar((char)ch)) break; // No room in convert buffer, so stop } // See if its a lead byte else if (IsGBLeadByte(ch)) { // ch is a lead byte, have room for more? if (buffer.MoreData) { byte ch2 = buffer.GetNextByte(); if (IsGBTwoByteTrailing(ch2)) { // // The trailing byte is a GB18030 two-byte sequence trailing byte. // // // Two-byte GB18030 // int iTwoBytes = ch << 8; iTwoBytes |= ch2; if (!buffer.AddChar(this.mapBytesToUnicode[iTwoBytes], 2)) break; } else if (IsGBFourByteTrailing(ch2)) { // Do we have room for Four Byte Sequence? (already have 1 byte) if (buffer.EvenMoreData(2)) { // Is it a valid 4 byte sequence? byte ch3 = buffer.GetNextByte(); byte ch4 = buffer.GetNextByte(); if (IsGBLeadByte(ch3) && IsGBFourByteTrailing(ch4)) { // // Four-byte GB18030 // int sFourBytesOffset = GetFourBytesOffset(ch, ch2, ch3, ch4); // What kind is it? // We'll be at least 1 BMP char or a '?' char. if (sFourBytesOffset <= GBLast4ByteCode) { // // The Unicode will be in the BMP range. // if (!buffer.AddChar(map4BytesToUnicode[sFourBytesOffset],4)) break; } else if (sFourBytesOffset >= GBSurrogateOffset && sFourBytesOffset <= GBLastSurrogateOffset) { // // This will be converted to a surrogate pair, need another char // // Use our surrogate sFourBytesOffset -= GBSurrogateOffset; if (!buffer.AddChar(unchecked((char)(0xd800 + (sFourBytesOffset / 0x400))), unchecked((char)(0xdc00 + (sFourBytesOffset % 0x400))),4)) break; } else { // Real GB18030 codepoint, but can't be mapped to unicode if (!buffer.Fallback(ch, ch2, ch3, ch4)) break; } } else { // Not a valid 2 or 4 byte sequence, use '?' for ch and try other 3 again buffer.AdjustBytes(-3); if (!buffer.Fallback(ch)) break; } } else { // No room for 4 bytes, have 2 already, may be one more // Lead byte but no place to stick it if (decoder != null && !decoder.MustFlush) { // (make sure not to set decoder if counting, so check chars) if (chars != null) { // We'll be able to stick the remainder in the decoder byte1 = ch; byte2 = ch2; if (buffer.MoreData) byte3 = buffer.GetNextByte(); else byte3 = -1; byte4=-1; } break; } // Won't go in decoder, we'll use '?' for it. if (!buffer.Fallback(ch, ch2)) break; } } else { // Unknown byte sequence, fall back lead byte and try 2nd one again buffer.AdjustBytes(-1); if (!buffer.Fallback(ch)) break; } } else { // Lead byte but don't know about trail byte // (make sure not to set decoder if counting, so check bytes) if (decoder != null && !decoder.MustFlush) { // We'll be able to stick it in the decoder // (don't actually do it when counting though) if (chars != null) { byte1 = ch; byte2 = -1; byte3 = -1; byte4 = -1; } break; } if (!buffer.Fallback(ch)) break; } } else { // Not ASCII and not a lead byte, we'll use '?' for it if we have room if (!buffer.Fallback(ch)) break; } } // Need to flush the decoder if necessary // (make sure not to set decoder if counting, so check bytes) if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = byte1; decoder.bLeftOver2 = byte2; decoder.bLeftOver3 = byte3; decoder.bLeftOver4 = byte4; } decoder.m_bytesUsed = buffer.BytesUsed; } // Return the # of characters we found return buffer.Count; }
internal override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder) { GB18030Decoder decoder = (GB18030Decoder) baseDecoder; Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(this, decoder, chars, charCount, bytes, byteCount); short ch = -1; short nextByte = -1; short num3 = -1; short num4 = -1; if ((decoder != null) && (decoder.bLeftOver1 != -1)) { ch = decoder.bLeftOver1; nextByte = decoder.bLeftOver2; num3 = decoder.bLeftOver3; num4 = decoder.bLeftOver4; while (ch != -1) { if (this.IsGBLeadByte(ch)) { goto Label_00FC; } if (ch <= 0x7f) { if (!buffer.AddChar((char) ((ushort) ch))) { break; } } else if (!buffer.Fallback((byte) ch)) { break; } ch = nextByte; nextByte = num3; num3 = num4; num4 = -1; continue; Label_0092: if (!buffer.MoreData) { if (!decoder.MustFlush) { if (chars != null) { decoder.bLeftOver1 = ch; decoder.bLeftOver2 = nextByte; decoder.bLeftOver3 = num3; decoder.bLeftOver4 = num4; } decoder.m_bytesUsed = buffer.BytesUsed; return buffer.Count; } goto Label_010E; } if (nextByte == -1) { nextByte = buffer.GetNextByte(); } else if (num3 == -1) { num3 = buffer.GetNextByte(); } else { num4 = buffer.GetNextByte(); } Label_00FC: if ((nextByte == -1) || (this.IsGBFourByteTrailing(nextByte) && (num4 == -1))) { goto Label_0092; } Label_010E: if (this.IsGBTwoByteTrailing(nextByte)) { int index = ch << 8; index |= (byte) nextByte; if (!buffer.AddChar(base.mapBytesToUnicode[index], 2)) { break; } ch = -1; nextByte = -1; continue; } if ((this.IsGBFourByteTrailing(nextByte) && this.IsGBLeadByte(num3)) && this.IsGBFourByteTrailing(num4)) { int num6 = this.GetFourBytesOffset(ch, nextByte, num3, num4); if (num6 <= 0x99fb) { if (!buffer.AddChar(this.map4BytesToUnicode[num6], 4)) { break; } } else if ((num6 >= 0x2e248) && (num6 <= 0x12e247)) { num6 -= 0x2e248; if (!buffer.AddChar((char) (0xd800 + (num6 / 0x400)), (char) (0xdc00 + (num6 % 0x400)), 4)) { break; } } else if (!buffer.Fallback((byte) ch, (byte) nextByte, (byte) num3, (byte) num4)) { break; } ch = -1; nextByte = -1; num3 = -1; num4 = -1; continue; } if (!buffer.Fallback((byte) ch)) { break; } ch = nextByte; nextByte = num3; num3 = num4; num4 = -1; } } while (buffer.MoreData) { byte num7 = buffer.GetNextByte(); if (num7 <= 0x7f) { if (buffer.AddChar((char) num7)) { continue; } break; } if (this.IsGBLeadByte(num7)) { if (buffer.MoreData) { byte num8 = buffer.GetNextByte(); if (this.IsGBTwoByteTrailing(num8)) { int num9 = num7 << 8; num9 |= num8; if (buffer.AddChar(base.mapBytesToUnicode[num9], 2)) { continue; } } else if (this.IsGBFourByteTrailing(num8)) { if (buffer.EvenMoreData(2)) { byte num10 = buffer.GetNextByte(); byte num11 = buffer.GetNextByte(); if (this.IsGBLeadByte(num10) && this.IsGBFourByteTrailing(num11)) { int num12 = this.GetFourBytesOffset(num7, num8, num10, num11); if (num12 <= 0x99fb) { if (buffer.AddChar(this.map4BytesToUnicode[num12], 4)) { continue; } } else if ((num12 >= 0x2e248) && (num12 <= 0x12e247)) { num12 -= 0x2e248; if (buffer.AddChar((char) (0xd800 + (num12 / 0x400)), (char) (0xdc00 + (num12 % 0x400)), 4)) { continue; } } else if (buffer.Fallback(num7, num8, num10, num11)) { continue; } } else { buffer.AdjustBytes(-3); if (buffer.Fallback(num7)) { continue; } } } else if ((decoder != null) && !decoder.MustFlush) { if (chars != null) { ch = num7; nextByte = num8; if (buffer.MoreData) { num3 = buffer.GetNextByte(); } else { num3 = -1; } num4 = -1; } } else if (buffer.Fallback(num7, num8)) { continue; } } else { buffer.AdjustBytes(-1); if (buffer.Fallback(num7)) { continue; } } } else if ((decoder != null) && !decoder.MustFlush) { if (chars != null) { ch = num7; nextByte = -1; num3 = -1; num4 = -1; } } else if (buffer.Fallback(num7)) { continue; } break; } if (!buffer.Fallback(num7)) { break; } } if (decoder != null) { if (chars != null) { decoder.bLeftOver1 = ch; decoder.bLeftOver2 = nextByte; decoder.bLeftOver3 = num3; decoder.bLeftOver4 = num4; } decoder.m_bytesUsed = buffer.BytesUsed; } return buffer.Count; }