示例#1
0
        public override int GetChars(byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex)
        {
            CheckRange(bytes, byteIndex, byteCount, chars, charIndex);

            int byteEnd   = byteIndex + byteCount;
            int charStart = charIndex;

            while (byteIndex < byteEnd)
            {
                if (bytes [byteIndex] < 0x80)
                {
                    chars [charIndex++] = (char)bytes [byteIndex++];
                    continue;
                }
                else if (bytes [byteIndex] == 0x80)
                {
                    // Euro sign - actually it is obsolete,
                    // now it's just reserved but not used
                    chars [charIndex++] = '\u20AC';
                    byteIndex++;
                    continue;
                }
                else if (bytes [byteIndex] == 0xFF)
                {
                    // invalid data - fill '?'
                    chars [charIndex++] = '?';
                    byteIndex++;
                    continue;
                }
                else if (byteIndex + 1 >= byteEnd)
                {
                    //incomplete1 = bytes [byteIndex++];
                    //incomplete2 = -1;
                    //incomplete3 = -1;
                    break;                     // incomplete tail.
                }

                byte second = bytes [byteIndex + 1];
                if (second == 0x7F || second == 0xFF)
                {
                    // invalid data
                    chars [charIndex++] = '?';
                    byteIndex          += 2;
                }
                else if (0x30 <= second && second <= 0x39)
                {
                    // UCS mapping
                    if (byteIndex + 3 >= byteEnd)
                    {
                        // incomplete tail.
                        //incomplete1 = bytes [byteIndex];
                        //incomplete2 = bytes [byteIndex + 1];
                        //if (byteIndex + 3 == byteEnd)
                        //	incomplete3 = bytes [byteIndex + 2];
                        break;
                    }
                    long value = GB18030Source.FromGBX(bytes, byteIndex);
                    if (value < 0)
                    {
                        // invalid data.
                        chars [charIndex++] = '?';
                        byteIndex          -= (int)value;
                    }
                    else if (value >= 0x10000)
                    {
                        // UTF16 surrogate
                        value -= 0x10000;
                        chars [charIndex++] = (char)(value / 0x400 + 0xD800);
                        chars [charIndex++] = (char)(value % 0x400 + 0xDC00);
                        byteIndex          += 4;
                    }
                    else
                    {
                        // UTF16 BMP
                        chars [charIndex++] = (char)value;
                        byteIndex          += 4;
                    }
                }
                else
                {
                    byte first = bytes [byteIndex];
                    int  ord   = ((first - 0x81) * 191 + second - 0x40) * 2;
                    char c1    = ord < 0 || ord >= gb2312.n2u.Length ?
                                 '\0' : (char)(gb2312.n2u [ord] + gb2312.n2u [ord + 1] * 256);
                    if (c1 == 0)
                    {
                        chars [charIndex++] = '?';
                    }
                    else
                    {
                        chars [charIndex++] = c1;
                    }
                    byteIndex += 2;
                }
            }

            return(charIndex - charStart);
        }
示例#2
0
        public override int GetCharCount(byte [] bytes, int start, int len)
        {
            CheckRange(bytes, start, len);

            int end = start + len;
            int ret = 0;

            while (start < end)
            {
                if (bytes [start] < 0x80)
                {
                    ret++;
                    start++;
                    continue;
                }
                else if (bytes [start] == 0x80)
                {
                    // Euro sign - actually it is obsolete,
                    // now it's just reserved but not used
                    ret++;
                    start++;
                    continue;
                }
                else if (bytes [start] == 0xFF)
                {
                    // invalid data - fill '?'
                    ret++;
                    start++;
                    continue;
                }
                else if (start + 1 >= end)
                {
//					incomplete1 = bytes [start];
//					incomplete2 = -1;
//					incomplete3 = -1;
                    ret++;
                    break;                     // incomplete tail.
                }

                byte second = bytes [start + 1];
                if (second == 0x7F || second == 0xFF)
                {
                    // invalid data
                    ret++;
                    start += 2;
                    continue;
                }
                else if (0x30 <= second && second <= 0x39)
                {
                    // UCS mapping
                    if (start + 3 >= end)
                    {
                        // incomplete tail.
//						incomplete1 = bytes [start];
//						incomplete2 = bytes [start + 1];
//						if (start + 3 == end)
//							incomplete3 = bytes [start + 2];
                        ret += start + 3 == end ? 3 : 2;
                        break;
                    }
                    long value = GB18030Source.FromGBX(bytes, start);
                    if (value < 0)
                    {
                        // invalid data.
                        ret++;
                        start -= (int)value;
                    }
                    else if (value >= 0x10000)
                    {
                        // UTF16 surrogate
                        ret   += 2;
                        start += 4;
                    }
                    else
                    {
                        // UTF16 BMP
                        ret++;
                        start += 4;
                    }
                }
                else
                {
                    // GB2312 mapping
                    start += 2;
                    ret++;
                }
            }
            return(ret);
        }