예제 #1
0
        /**
         * Parses the next token from the input stream of this tokenizer.
         * The type of the next token is returned in the <code>ttype</code>
         * field. Additional information about the token may be in the
         * <code>nval</code> field or the <code>sval</code> field of this
         * tokenizer.
         * <p>
         * Typical clients of this
         * class first set up the syntax tables and then sit in a loop
         * calling nextToken to parse successive tokens until TT_EOF
         * is returned.
         *
         * @return     the value of the <code>ttype</code> field.
         */

        public int NextToken()
        {
            if (_pushedBack)
            {
                _pushedBack = false;
                return(Ttype);
            }
            byte[] ct = _characterType;
            StringValue = null;

            int c = _peekc;

            if (c < 0)
            {
                c = NeedChar;
            }
            if (c == SkipLf)
            {
                c = Read();
                if (c < 0)
                {
                    return(Ttype = TtEof);
                }
                if (c == '\n')
                {
                    c = NeedChar;
                }
            }
            if (c == NeedChar)
            {
                c = Read();
                if (c < 0)
                {
                    return(Ttype = TtEof);
                }
            }
            Ttype = c; /* Just to be safe */

            /* Set peekc so that the next invocation of nextToken will Read
             * another character unless peekc is reset in this invocation
             */
            _peekc = NeedChar;

            int ctype = c < 256 ? ct[c] : CtAlpha;

            while ((ctype & CtWhitespace) != 0)
            {
                if (c == '\r')
                {
                    LineNumber++;
                    if (_eolIsSignificantP)
                    {
                        _peekc = SkipLf;
                        return(Ttype = TtEol);
                    }
                    c = Read();
                    if (c == '\n')
                    {
                        c = Read();
                    }
                }
                else
                {
                    if (c == '\n')
                    {
                        LineNumber++;
                        if (_eolIsSignificantP)
                        {
                            return(Ttype = TtEol);
                        }
                    }
                    c = Read();
                }
                if (c < 0)
                {
                    return(Ttype = TtEof);
                }
                ctype = c < 256 ? ct[c] : CtAlpha;
            }

            if ((ctype & CtDigit) != 0)
            {
                bool neg = false;
                if (c == '-')
                {
                    c = Read();
                    if (c != '.' && (c < '0' || c > '9'))
                    {
                        _peekc = c;
                        return(Ttype = '-');
                    }
                    neg = true;
                }
                double v       = 0;
                int    decexp  = 0;
                int    seendot = 0;
                while (true)
                {
                    if (c == '.' && seendot == 0)
                    {
                        seendot = 1;
                    }
                    else if ('0' <= c && c <= '9')
                    {
                        v       = v * 10 + (c - '0');
                        decexp += seendot;
                    }
                    else
                    {
                        break;
                    }
                    c = Read();
                }
                _peekc = c;
                if (decexp != 0)
                {
                    double denom = 10;
                    decexp--;
                    while (decexp > 0)
                    {
                        denom *= 10;
                        decexp--;
                    }
                    /* Do one division of a likely-to-be-more-accurate number */
                    v = v / denom;
                }
                NumberValue = neg ? -v : v;
                StringValue = NumberValue.ToString(CultureInfo.InvariantCulture);
                if (seendot == 1 && NumberValue.CompareTo(Math.Abs(NumberValue)) == 0)
                {
                    StringValue += ".0";
                }
                return(Ttype = TtNumber);
            }

            if ((ctype & CtAlpha) != 0)
            {
                var buf = new List <char>();
                int i   = 0;
                do
                {
                    //buf[i++] = (char)c;
                    buf.Add((char)c);
                    i++;
                    c     = Read();
                    ctype = c < 0 ? CtWhitespace : c < 256 ? ct[c] : CtAlpha;
                } while ((ctype & (CtAlpha | CtDigit)) != 0);
                _peekc      = c;
                StringValue = new string(buf.ToArray(), 0, i);
                if (_forceLower)
                {
                    StringValue = StringValue.ToLower();
                }
                return(Ttype = TtWord);
            }

            if ((ctype & CtQuote) != 0)
            {
                var buf = new List <char>();

                Ttype = c;
                int i = 0;

                /* Invariants (because \Octal needs a lookahead):
                 *   (i)  c contains char value
                 *   (ii) d contains the lookahead
                 */
                int d = Read();
                while (d >= 0 && d != Ttype && d != '\n' && d != '\r')
                {
                    if (d == '\\')
                    {
                        c = Read();
                        int first = c; /* To allow \377, but not \477 */
                        if (c >= '0' && c <= '7')
                        {
                            c = c - '0';
                            int c2 = Read();
                            if ('0' <= c2 && c2 <= '7')
                            {
                                c  = (c << 3) + (c2 - '0');
                                c2 = Read();
                                if ('0' <= c2 && c2 <= '7' && first <= '3')
                                {
                                    c = (c << 3) + (c2 - '0');
                                    d = Read();
                                }
                                else
                                {
                                    d = c2;
                                }
                            }
                            else
                            {
                                d = c2;
                            }
                        }
                        else
                        {
                            switch (c)
                            {
                            case 'a':
                                c = 0x7;
                                break;

                            case 'b':
                                c = '\b';
                                break;

                            case 'f':
                                c = 0xC;
                                break;

                            case 'n':
                                c = '\n';
                                break;

                            case 'r':
                                c = '\r';
                                break;

                            case 't':
                                c = '\t';
                                break;

                            case 'v':
                                c = 0xB;
                                break;
                            }
                            d = Read();
                        }
                    }
                    else
                    {
                        c = d;
                        d = Read();
                    }
                    //buf[i++] = (char)c;
                    buf.Add((char)c);
                    i++;
                }

                /* If we broke out of the loop because we found a matching quote
                 * character then arrange to Read a new character next time
                 * around; otherwise, save the character.
                 */
                _peekc = (d == Ttype) ? NeedChar : d;

                StringValue = new string(buf.ToArray(), 0, i);
                return(Ttype);
            }

            if (c == '/' && (_slashSlashCommentsP || _slashStarCommentsP))
            {
                c = Read();
                if (c == '*' && _slashStarCommentsP)
                {
                    int prevc = 0;
                    while ((c = Read()) != '/' || prevc != '*')
                    {
                        if (c == '\r')
                        {
                            LineNumber++;
                            c = Read();
                            if (c == '\n')
                            {
                                c = Read();
                            }
                        }
                        else
                        {
                            if (c == '\n')
                            {
                                LineNumber++;
                                c = Read();
                            }
                        }
                        if (c < 0)
                        {
                            return(Ttype = TtEof);
                        }
                        prevc = c;
                    }
                    return(NextToken());
                }
                if (c == '/' && _slashSlashCommentsP)
                {
                    while ((c = Read()) != '\n' && c != '\r' && c >= 0)
                    {
                    }
                    _peekc = c;
                    return(NextToken());
                }
                /* Now see if it is still a single line comment */
                if ((ct['/'] & CtComment) != 0)
                {
                    while ((c = Read()) != '\n' && c != '\r' && c >= 0)
                    {
                    }
                    _peekc = c;
                    return(NextToken());
                }
                _peekc = c;
                return(Ttype = '/');
            }

            if ((ctype & CtComment) != 0)
            {
                while ((c = Read()) != '\n' && c != '\r' && c >= 0)
                {
                }
                _peekc = c;
                return(NextToken());
            }

            return(Ttype = c);
        }