Example #1
0
            internal override bool NextChar(CharsetRecog_mbcs.iteratedChar it, CharsetDetector det)
            {
                it.index = it.nextIndex;
                it.error = false;
                int firstByte;

                firstByte = it.charValue = it.NextByte(det);
                if (firstByte < 0)
                {
                    return(false);
                }

                if (firstByte <= 0x7f || firstByte == 0xff)
                {
                    // single byte character.
                    return(true);
                }

                int secondByte = it.NextByte(det);

                if (secondByte < 0)
                {
                    return(false);
                }
                it.charValue = (it.charValue << 8) | secondByte;

                if (secondByte < 0x40 || secondByte == 0x7f || secondByte == 0xff)
                {
                    it.error = true;
                }
                return(true);
            }
Example #2
0
            internal override bool NextChar(CharsetRecog_mbcs.iteratedChar it, CharsetDetector det)
            {
                it.index = it.nextIndex;
                it.error = false;
                int firstByte;

                firstByte = it.charValue = it.NextByte(det);
                if (firstByte < 0)
                {
                    return(false);
                }

                if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
                {
                    return(true);
                }

                int secondByte = it.NextByte(det);

                if (secondByte < 0)
                {
                    return(false);
                }
                it.charValue = (firstByte << 8) | secondByte;
                if (!((secondByte >= 0x40 && secondByte <= 0x7f) || (secondByte >= 0x80 && secondByte <= 0xff)))
                {
                    // Illegal second byte value.
                    it.error = true;
                }
                return(true);
            }
Example #3
0
            /*
             * (non-Javadoc) Get the next character value for EUC based encodings.
             * Character "value" is simply the raw bytes that make up the character
             * packed into an int.
             */
            internal override bool NextChar(CharsetRecog_mbcs.iteratedChar it, CharsetDetector det)
            {
                it.index = it.nextIndex;
                it.error = false;
                int firstByte  = 0;
                int secondByte = 0;
                int thirdByte  = 0;
                int fourthByte = 0;

                buildChar : {
                    {
                        firstByte = it.charValue = it.NextByte(det);

                        if (firstByte < 0)
                        {
                            // Ran off the end of the input data
                            it.done = true;
                            goto gotobuildChar;
                        }

                        if (firstByte <= 0x80)
                        {
                            goto gotobuildChar;
                        }

                        secondByte   = it.NextByte(det);
                        it.charValue = (it.charValue << 8) | secondByte;

                        if (firstByte >= 0x81 && firstByte <= 0xFE)
                        {
                            // Two byte Char
                            if ((secondByte >= 0x40 && secondByte <= 0x7E) ||
                                (secondByte >= 80 && secondByte <= 0xFE))
                            {
                                goto gotobuildChar;
                            }

                            // Four byte char
                            if (secondByte >= 0x30 && secondByte <= 0x39)
                            {
                                thirdByte = it.NextByte(det);

                                if (thirdByte >= 0x81 && thirdByte <= 0xFE)
                                {
                                    fourthByte = it.NextByte(det);

                                    if (fourthByte >= 0x30 && fourthByte <= 0x39)
                                    {
                                        it.charValue = (it.charValue << 16)
                                                       | (thirdByte << 8) | fourthByte;
                                        goto gotobuildChar;
                                    }
                                }
                            }

                            it.error = true;
                            goto gotobuildChar;
                        }
                    }
                }
gotobuildChar:
                ;

                return(it.done == false);
            }
Example #4
0
            /*
             * (non-Javadoc) Get the next character value for EUC based encodings.
             * Character "value" is simply the raw bytes that make up the character
             * packed into an int.
             */
            internal override bool NextChar(CharsetRecog_mbcs.iteratedChar it, CharsetDetector det)
            {
                it.index = it.nextIndex;
                it.error = false;
                int firstByte  = 0;
                int secondByte = 0;
                int thirdByte  = 0;

                // int fourthByte = 0;

                buildChar : {
                    {
                        firstByte = it.charValue = it.NextByte(det);
                        if (firstByte < 0)
                        {
                            // Ran off the end of the input data
                            it.done = true;
                            goto gotobuildChar;
                        }
                        if (firstByte <= 0x8d)
                        {
                            goto gotobuildChar;
                        }

                        secondByte   = it.NextByte(det);
                        it.charValue = (it.charValue << 8) | secondByte;

                        if (firstByte >= 0xA1 && firstByte <= 0xfe)
                        {
                            // Two byte Char
                            if (secondByte < 0xa1)
                            {
                                it.error = true;
                            }
                            goto gotobuildChar;
                        }
                        if (firstByte == 0x8e)
                        {
                            // Code Set 2.
                            // In EUC-JP, total char size is 2 bytes, only one byte of
                            // actual char value.
                            // In EUC-TW, total char size is 4 bytes, three bytes
                            // contribute to char value.
                            // We don't know which we've got.
                            // Treat it like EUC-JP. If the data really was EUC-TW, the
                            // following two
                            // bytes will look like a well formed 2 byte char.
                            if (secondByte < 0xa1)
                            {
                                it.error = true;
                            }
                            goto gotobuildChar;
                        }

                        if (firstByte == 0x8f)
                        {
                            // Code set 3.
                            // Three byte total char size, two bytes of actual char
                            // value.
                            thirdByte    = it.NextByte(det);
                            it.charValue = (it.charValue << 8) | thirdByte;
                            if (thirdByte < 0xa1)
                            {
                                it.error = true;
                            }
                        }
                    }
                }
gotobuildChar:
                ;

                return(it.done == false);
            }