Beispiel #1
0
        /// <summary>
        /// Parse a string known to be a hex string.  This is faster
        /// than Parse which doesn't assume the number is Hex.  This will
        /// throw an exception if the input number isn't hex.
        /// </summary>
        /// <param name="s">The hex number as a string.</param>
        /// <param name="lineNumber">The line where this token was found.</param>
        /// <returns>A new IntToken set to the value in the input string.</returns>
        public static IntToken ParseHex(string s, int lineNumber)
        {
            IntToken it = null;

            try
            {
                it = new IntToken(Convert.ToInt32(s, 16), lineNumber);
            }
            catch
            {
                it = new IntToken(Convert.ToInt64(s, 16), lineNumber);
            }

            return(it);
        }
        /// <summary>
        /// Get the next token.  The last token will be an EofToken unless
        /// there's an unterminated quote or unterminated block comment
        /// and Settings.DoUntermCheck is true, in which case this throws
        /// an exception of type UntermException or sub-class.
        /// </summary>
        /// <param name="token">The output token.</param>
        /// <returns>bool - true for success, false for failure.</returns>
        public bool NextToken(out Token token)
        {
            if (_pushedBack)
            {
                _pushedBack = false;
                token       = _token;
                return(true);
            }

            token = null;
            int  thisChar = 0;      // current character
            byte ctype;             // type of this character

            NextTokenState state     = NextTokenState.Start;
            int            prevChar  = 0; // previous character
            byte           prevCtype = (byte)CharTypeBits.Eof;

            // get previous char from nextTokenSb if there
            // (nextTokenSb is a StringBuilder containing the characters
            //  of the next token to be emitted)
            if (nextTokenSb.Length > 0)
            {
                prevChar  = nextTokenSb[nextTokenSb.Length - 1];
                prevCtype = settings.CharTypes[prevChar];
                state     = PickNextState(prevCtype, prevChar);
            }

            // extra state for number parse
            int  seenDot   = 0;        // how many .'s in the number
            int  seenE     = 0;        // how many e's or E's have we seen in the number
            bool seenDigit = false;    // seen any digits (numbers can start with -)

            // lineNumber can change with each GetNextChar()
            // tokenLineNumber is the line on which the token started
            int tokenLineNumber = lineNumber;

            // State Machine: Produces a single token.
            // Enter a state based on a single character.
            // Generally, being in a state means we're currently collecting chars
            // in that type of token.
            // We do state machine until it builds a token (Eof is a token), then
            // return that token.
            thisChar = prevChar;           // for first iteration, since prevChar is set to this
            bool done = false;             // optimization

            while (!done)
            {
                prevChar = thisChar;
                thisChar = GetNextChar();
                ctype    = settings.CharTypes[thisChar];

                // see if we need to change states, or emit a token
                switch (state)
                {
                case NextTokenState.Start:
                    // RESET
                    state           = PickNextState(ctype, thisChar);
                    tokenLineNumber = lineNumber;
                    break;

                case NextTokenState.Char:
                    token = new CharToken((char)prevChar, tokenLineNumber);
                    done  = true;
                    nextTokenSb.Length = 0;
                    break;

                case NextTokenState.Word:
                    if ((!settings.IsCharType(ctype, CharTypeBits.Word)) &&
                        (!settings.IsCharType(ctype, CharTypeBits.Digit)))
                    {
                        // end of word, emit
                        token = new WordToken(nextTokenSb.ToString(), tokenLineNumber);
                        done  = true;
                        nextTokenSb.Length = 0;
                    }
                    break;

                case NextTokenState.Whitespace:
                    if (!settings.IsCharType(ctype, CharTypeBits.Whitespace) ||
                        (settings.GrabWhitespace && (thisChar == 10)))
                    {
                        // end of whitespace, emit
                        if (settings.GrabWhitespace)
                        {
                            token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber);
                            done  = true;
                            nextTokenSb.Length = 0;
                        }
                        else
                        {
                            // RESET
                            nextTokenSb.Length = 0;
                            tokenLineNumber    = lineNumber;
                            state = PickNextState(ctype, thisChar);
                        }
                    }
                    break;

                case NextTokenState.EndQuote:
                    // we're now 1 char after end of quote
                    token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
                    done  = true;
                    nextTokenSb.Length = 0;
                    break;

                case NextTokenState.Quote:
                    // looking for end quote matching char that started the quote
                    if (thisChar == nextTokenSb[0])
                    {
                        // handle escaped backslashes: count the immediately prior backslashes
                        // - even (including 0) means it's not escaped
                        // - odd means it is escaped
                        int backSlashCount = 0;
                        for (int i = nextTokenSb.Length - 1; i >= 0; i--)
                        {
                            if (nextTokenSb[i] == '\\')
                            {
                                backSlashCount++;
                            }
                            else
                            {
                                break;
                            }
                        }

                        if ((backSlashCount % 2) == 0)
                        {
                            state = NextTokenState.EndQuote;
                        }
                    }

                    if ((state != NextTokenState.EndQuote) && (thisChar == EOF))
                    {
                        if (settings.DoUntermCheck)
                        {
                            nextTokenSb.Length = 0;
                            throw new UntermQuoteException("Unterminated quote");
                        }

                        token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber);
                        done  = true;
                        nextTokenSb.Length = 0;
                    }
                    break;

                case NextTokenState.MaybeComment:
                    if (thisChar == EOF)
                    {
                        token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
                        done  = true;
                        nextTokenSb.Length = 0;
                    }
                    else
                    {
                        // if we get the right char, we're in a comment
                        if (settings.SlashSlashComments && (thisChar == '/'))
                        {
                            state = NextTokenState.LineComment;
                        }
                        else if (settings.SlashStarComments && (thisChar == '*'))
                        {
                            state = NextTokenState.BlockComment;
                        }
                        else
                        {
                            token = new CharToken(nextTokenSb.ToString(), tokenLineNumber);
                            done  = true;
                            nextTokenSb.Length = 0;
                        }
                    }
                    break;

                case NextTokenState.LineComment:
                    if (thisChar == EOF)
                    {
                        if (settings.GrabComments)
                        {
                            token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
                            done  = true;
                            nextTokenSb.Length = 0;
                        }
                        else
                        {
                            // RESET
                            nextTokenSb.Length = 0;
                            tokenLineNumber    = lineNumber;
                            state = PickNextState(ctype, thisChar);
                        }
                    }
                    else
                    {
                        if (thisChar == '\n')
                        {
                            if (settings.GrabComments)
                            {
                                token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
                                done  = true;
                                nextTokenSb.Length = 0;
                            }
                            else
                            {
                                // RESET
                                nextTokenSb.Length = 0;
                                tokenLineNumber    = lineNumber;
                                state = PickNextState(ctype, thisChar);
                            }
                        }
                    }
                    break;

                case NextTokenState.BlockComment:
                    if (thisChar == EOF)
                    {
                        if (settings.DoUntermCheck)
                        {
                            nextTokenSb.Length = 0;
                            throw new UntermCommentException("Unterminated comment.");
                        }

                        if (settings.GrabComments)
                        {
                            token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
                            done  = true;
                            nextTokenSb.Length = 0;
                        }
                        else
                        {
                            // RESET
                            nextTokenSb.Length = 0;
                            tokenLineNumber    = lineNumber;
                            state = PickNextState(ctype, thisChar);
                        }
                    }
                    else
                    {
                        if ((thisChar == '/') && (prevChar == '*'))
                        {
                            state = NextTokenState.EndBlockComment;
                        }
                    }
                    break;

                // special case for 2-character token termination
                case NextTokenState.EndBlockComment:
                    if (settings.GrabComments)
                    {
                        token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber);
                        done  = true;
                        nextTokenSb.Length = 0;
                    }
                    else
                    {
                        // RESET
                        nextTokenSb.Length = 0;
                        tokenLineNumber    = lineNumber;
                        state = PickNextState(ctype, thisChar);
                    }
                    break;

                case NextTokenState.MaybeHex:
                    // previous char was 0
                    if (thisChar != 'x')
                    {
                        // back up and try non-hex
                        // back up to the 0
                        nextTokenSb.Append((char)thisChar);
                        backString.Append(nextTokenSb);
                        nextTokenSb.Length = 0;

                        // reset state and don't choose MaybeNumber state.
                        // pull char from backString
                        thisChar = backString[0];
                        backString.Remove(0, 1);
                        state = PickNextState(settings.CharTypes[thisChar], (int)thisChar,
                                              NextTokenState.MaybeHex);
                    }
                    else
                    {
                        state = NextTokenState.HexGot0x;
                    }
                    break;

                case NextTokenState.HexGot0x:
                    if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
                    {
                        // got 0x but now a non-hex char
                        // back up to the 0
                        nextTokenSb.Append((char)thisChar);
                        backString.Append(nextTokenSb);
                        nextTokenSb.Length = 0;

                        // reset state and don't choose MaybeNumber state.
                        // pull char from backString
                        thisChar = backString[0];
                        backString.Remove(0, 1);
                        state = PickNextState(settings.CharTypes[thisChar], (int)thisChar,
                                              NextTokenState.MaybeHex);
                    }
                    else
                    {
                        state = NextTokenState.HexNumber;
                    }
                    break;

                case NextTokenState.HexNumber:
                    if (!settings.IsCharType(ctype, CharTypeBits.HexDigit))
                    {
                        // emit the hex number we've collected
                        token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber);
                        done  = true;
                        nextTokenSb.Length = 0;
                    }
                    break;

                case NextTokenState.MaybeNumber:
                    //
                    // Determine whether or not to stop collecting characters for
                    // the number parse.  We terminate when it's clear it's not
                    // a number or no longer a number.
                    //
                    bool term = false;

                    if (settings.IsCharType(ctype, CharTypeBits.Digit) ||
                        settings.IsCharType(prevChar, CharTypeBits.Digit))
                    {
                        seenDigit = true;
                    }

                    // term conditions
                    if (thisChar == '.')
                    {
                        seenDot++;
                        if (seenDot > 1)
                        {
                            term = true;                                               // more than one dot, it aint a number
                        }
                    }
                    else if (((thisChar == 'e') || (thisChar == 'E')))
                    {
                        seenE++;
                        if (!seenDigit)
                        {
                            term = true;                                              // e before any digits is bad
                        }
                        else if (seenE > 1)
                        {
                            term = true;                                                  // more than 1 e is bad
                        }
                        else
                        {
                            term = true;                                     // done regardless

                            // scan the exponent, put its characters into
                            // nextTokenSb, if there are any
                            char c;
                            expSb.Clear();
                            expSb.Append((char)thisChar);
                            if (GrabInt(expSb, true, out c))
                            {
                                // we got a good exponent, tack it on
                                nextTokenSb.Append(expSb);
                                thisChar = c;                                         // and continue after the exponent's characters
                            }
                        }
                    }
                    else if (thisChar == EOF)
                    {
                        term = true;
                    }
                    // or a char that can't be in a number
                    else if ((!settings.IsCharType(ctype, CharTypeBits.Digit) &&
                              (thisChar != 'e') && (thisChar != 'E') &&
                              (thisChar != '-') && (thisChar != '.')) ||
                             ((thisChar == '+') && (seenE == 0)))
                    {
                        // it's not a normal number character
                        term = true;
                    }
                    // or a dash not after e
                    else if ((thisChar == '-') &&
                             (!((prevChar == 'e') || (prevChar == 'E'))))
                    {
                        term = true;
                    }

                    if (term)
                    {
                        // we are terminating a number, or it wasn't a number
                        if (seenDigit)
                        {
                            if ((nextTokenSb.IndexOf('.') >= 0) ||
                                (nextTokenSb.IndexOf('e') >= 0) ||
                                (nextTokenSb.IndexOf('E') >= 0) ||
                                (nextTokenSb.Length >= 19)                                            // probably too large for Int64, use float
                                )
                            {
                                token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber);
                            }
                            else
                            {
                                token = new IntToken(nextTokenSb.ToString(), tokenLineNumber);
                            }
                            done = true;
                            nextTokenSb.Length = 0;
                        }
                        else
                        {
                            // -whatever or -.whatever
                            // didn't see any digits, must have gotten here by a leading -
                            // and no digits after it
                            // back up to -, pick next state excluding numbers
                            nextTokenSb.Append((char)thisChar);
                            backString.Append(nextTokenSb);
                            nextTokenSb.Length = 0;

                            // restart on the - and don't choose MaybeNumber state
                            // pull char from backString
                            thisChar = backString[0];
                            backString.Remove(0, 1);
                            state = PickNextState(settings.CharTypes[thisChar], (int)thisChar,
                                                  NextTokenState.MaybeNumber);
                        }
                    }
                    break;

                case NextTokenState.Eol:
                    // tokenLineNumber - 1 because the newline char is on the previous line
                    token = new EolToken(tokenLineNumber - 1);
                    done  = true;
                    nextTokenSb.Length = 0;
                    _token             = token;
                    break;

                case NextTokenState.Eof:
                    token = new EofToken(tokenLineNumber);
                    done  = true;
                    nextTokenSb.Length = 0;
                    _token             = token;
                    return(false);

                case NextTokenState.Invalid:
                default:
                    // not a good sign, some unrepresented state?
//						log.Error("NextToken: Hit unrepresented state {0}", state);
                    _token = null;
                    return(false);
                }

                // use a StringBuilder to accumulate characters which are part of this token
                if (thisChar != EOF)
                {
                    nextTokenSb.Append((char)thisChar);
                }
            }

            _token = token;

            return(true);
        }