public override int GetChars(byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex) { CheckRange(bytes, byteIndex, byteCount, chars, charIndex); int byteEnd = byteIndex + byteCount; int charStart = charIndex; while (byteIndex < byteEnd) { if (bytes [byteIndex] < 0x80) { chars [charIndex++] = (char)bytes [byteIndex++]; continue; } else if (bytes [byteIndex] == 0x80) { // Euro sign - actually it is obsolete, // now it's just reserved but not used chars [charIndex++] = '\u20AC'; byteIndex++; continue; } else if (bytes [byteIndex] == 0xFF) { // invalid data - fill '?' chars [charIndex++] = '?'; byteIndex++; continue; } else if (byteIndex + 1 >= byteEnd) { //incomplete1 = bytes [byteIndex++]; //incomplete2 = -1; //incomplete3 = -1; break; // incomplete tail. } byte second = bytes [byteIndex + 1]; if (second == 0x7F || second == 0xFF) { // invalid data chars [charIndex++] = '?'; byteIndex += 2; } else if (0x30 <= second && second <= 0x39) { // UCS mapping if (byteIndex + 3 >= byteEnd) { // incomplete tail. //incomplete1 = bytes [byteIndex]; //incomplete2 = bytes [byteIndex + 1]; //if (byteIndex + 3 == byteEnd) // incomplete3 = bytes [byteIndex + 2]; break; } long value = GB18030Source.FromGBX(bytes, byteIndex); if (value < 0) { // invalid data. chars [charIndex++] = '?'; byteIndex -= (int)value; } else if (value >= 0x10000) { // UTF16 surrogate value -= 0x10000; chars [charIndex++] = (char)(value / 0x400 + 0xD800); chars [charIndex++] = (char)(value % 0x400 + 0xDC00); byteIndex += 4; } else { // UTF16 BMP chars [charIndex++] = (char)value; byteIndex += 4; } } else { byte first = bytes [byteIndex]; int ord = ((first - 0x81) * 191 + second - 0x40) * 2; char c1 = ord < 0 || ord >= gb2312.n2u.Length ? '\0' : (char)(gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256); if (c1 == 0) { chars [charIndex++] = '?'; } else { chars [charIndex++] = c1; } byteIndex += 2; } } return(charIndex - charStart); }
public override int GetCharCount(byte [] bytes, int start, int len) { CheckRange(bytes, start, len); int end = start + len; int ret = 0; while (start < end) { if (bytes [start] < 0x80) { ret++; start++; continue; } else if (bytes [start] == 0x80) { // Euro sign - actually it is obsolete, // now it's just reserved but not used ret++; start++; continue; } else if (bytes [start] == 0xFF) { // invalid data - fill '?' ret++; start++; continue; } else if (start + 1 >= end) { // incomplete1 = bytes [start]; // incomplete2 = -1; // incomplete3 = -1; ret++; break; // incomplete tail. } byte second = bytes [start + 1]; if (second == 0x7F || second == 0xFF) { // invalid data ret++; start += 2; continue; } else if (0x30 <= second && second <= 0x39) { // UCS mapping if (start + 3 >= end) { // incomplete tail. // incomplete1 = bytes [start]; // incomplete2 = bytes [start + 1]; // if (start + 3 == end) // incomplete3 = bytes [start + 2]; ret += start + 3 == end ? 3 : 2; break; } long value = GB18030Source.FromGBX(bytes, start); if (value < 0) { // invalid data. ret++; start -= (int)value; } else if (value >= 0x10000) { // UTF16 surrogate ret += 2; start += 4; } else { // UTF16 BMP ret++; start += 4; } } else { // GB2312 mapping start += 2; ret++; } } return(ret); }