Example #1
0
        private static string DecodeUTF(int utfSize, IDataInput @in)
        {
            byte[] buf  = new byte[utfSize];
            char[] @out = new char[utfSize];
            @in.ReadFully(buf, 0, utfSize);

            return(ConvertUTF8WithBuf(buf, @out, 0, utfSize));
        }
Example #2
0
        public static string ReadUTF(IDataInput @in)
        {
            int utflen = @in.ReadUnsignedShort();

            byte[] bytearr = null;
            char[] chararr = null;
            if (@in is DataInputStream)
            {
                DataInputStream dis = (DataInputStream)@in;
                if (dis.bytearr.Length < utflen)
                {
                    dis.bytearr = new byte[utflen * 2];
                    dis.chararr = new char[utflen * 2];
                }
                chararr = dis.chararr;
                bytearr = dis.bytearr;
            }
            else
            {
                bytearr = new byte[utflen];
                chararr = new char[utflen];
            }

            int c, char2, char3;
            int count         = 0;
            int chararr_count = 0;

            @in.ReadFully(bytearr, 0, utflen);

            while (count < utflen)
            {
                c = (int)bytearr[count] & 0xff;
                if (c > 127)
                {
                    break;
                }
                count++;
                chararr[chararr_count++] = (char)c;
            }

            while (count < utflen)
            {
                c = (int)bytearr[count] & 0xff;
                switch (c >> 4)
                {
                case 0:
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                case 7:
                    /* 0xxxxxxx*/
                    count++;
                    chararr[chararr_count++] = (char)c;
                    break;

                case 12:
                case 13:
                    /* 110x xxxx   10xx xxxx*/
                    count += 2;
                    if (count > utflen)
                    {
                        throw new FormatException(
                                  "malformed input: partial character at end");
                    }
                    char2 = (int)bytearr[count - 1];
                    if ((char2 & 0xC0) != 0x80)
                    {
                        throw new FormatException(
                                  "malformed input around byte " + count);
                    }
                    chararr[chararr_count++] = (char)(((c & 0x1F) << 6) |
                                                      (char2 & 0x3F));
                    break;

                case 14:
                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
                    count += 3;
                    if (count > utflen)
                    {
                        throw new FormatException(
                                  "malformed input: partial character at end");
                    }
                    char2 = (int)bytearr[count - 2];
                    char3 = (int)bytearr[count - 1];
                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                    {
                        throw new FormatException(
                                  "malformed input around byte " + (count - 1));
                    }
                    chararr[chararr_count++] = (char)(((c & 0x0F) << 12) |
                                                      ((char2 & 0x3F) << 6) |
                                                      ((char3 & 0x3F) << 0));
                    break;

                default:
                    /* 10xx xxxx,  1111 xxxx */
                    throw new FormatException(
                              "malformed input around byte " + count);
                }
            }
            // The number of chars produced may be less than utflen
            return(new string(chararr, 0, chararr_count));
        }
        public static string ReadUTF(IDataInput @in)
        {
            int utflen = @in.ReadUnsignedShort();
            byte[] bytearr = null;
            char[] chararr = null;
            if (@in is DataInputStream)
            {
                DataInputStream dis = (DataInputStream)@in;
                if (dis.bytearr.Length < utflen)
                {
                    dis.bytearr = new byte[utflen * 2];
                    dis.chararr = new char[utflen * 2];
                }
                chararr = dis.chararr;
                bytearr = dis.bytearr;
            }
            else
            {
                bytearr = new byte[utflen];
                chararr = new char[utflen];
            }

            int c, char2, char3;
            int count = 0;
            int chararr_count = 0;

            @in.ReadFully(bytearr, 0, utflen);

            while (count < utflen)
            {
                c = (int)bytearr[count] & 0xff;
                if (c > 127) break;
                count++;
                chararr[chararr_count++] = (char)c;
            }

            while (count < utflen)
            {
                c = (int)bytearr[count] & 0xff;
                switch (c >> 4)
                {
                    case 0:
                    case 1:
                    case 2:
                    case 3:
                    case 4:
                    case 5:
                    case 6:
                    case 7:
                        /* 0xxxxxxx*/
                        count++;
                        chararr[chararr_count++] = (char)c;
                        break;
                    case 12:
                    case 13:
                        /* 110x xxxx   10xx xxxx*/
                        count += 2;
                        if (count > utflen)
                            throw new FormatException(
                                "malformed input: partial character at end");
                        char2 = (int)bytearr[count - 1];
                        if ((char2 & 0xC0) != 0x80)
                            throw new FormatException(
                                "malformed input around byte " + count);
                        chararr[chararr_count++] = (char)(((c & 0x1F) << 6) |
                                                        (char2 & 0x3F));
                        break;
                    case 14:
                        /* 1110 xxxx  10xx xxxx  10xx xxxx */
                        count += 3;
                        if (count > utflen)
                            throw new FormatException(
                                "malformed input: partial character at end");
                        char2 = (int)bytearr[count - 2];
                        char3 = (int)bytearr[count - 1];
                        if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                            throw new FormatException(
                                "malformed input around byte " + (count - 1));
                        chararr[chararr_count++] = (char)(((c & 0x0F) << 12) |
                                                        ((char2 & 0x3F) << 6) |
                                                        ((char3 & 0x3F) << 0));
                        break;
                    default:
                        /* 10xx xxxx,  1111 xxxx */
                        throw new FormatException(
                            "malformed input around byte " + count);
                }
            }
            // The number of chars produced may be less than utflen
            return new string(chararr, 0, chararr_count);
        }