/// <summary> /// Equals overload. /// </summary> /// <param name="c">The char to compare to.</param> /// <returns>bool</returns> public bool Equals(char c) { if (!(this is CharToken)) { return(false); } CharToken ct = this as CharToken; return(ct.Object.Equals(c)); }
/// <summary> /// Get the next token. The last token will be an EofToken unless /// there's an unterminated quote or unterminated block comment /// and Settings.DoUntermCheck is true, in which case this throws /// an exception of type UntermException or sub-class. /// </summary> /// <param name="token">The output token.</param> /// <returns>bool - true for success, false for failure.</returns> public bool NextToken(out Token token) { if (_pushedBack) { _pushedBack = false; token = _token; return(true); } token = null; int thisChar = 0; // current character byte ctype; // type of this character NextTokenState state = NextTokenState.Start; int prevChar = 0; // previous character byte prevCtype = (byte)CharTypeBits.Eof; // get previous char from nextTokenSb if there // (nextTokenSb is a StringBuilder containing the characters // of the next token to be emitted) if (nextTokenSb.Length > 0) { prevChar = nextTokenSb[nextTokenSb.Length - 1]; prevCtype = settings.CharTypes[prevChar]; state = PickNextState(prevCtype, prevChar); } // extra state for number parse int seenDot = 0; // how many .'s in the number int seenE = 0; // how many e's or E's have we seen in the number bool seenDigit = false; // seen any digits (numbers can start with -) // lineNumber can change with each GetNextChar() // tokenLineNumber is the line on which the token started int tokenLineNumber = lineNumber; // State Machine: Produces a single token. // Enter a state based on a single character. // Generally, being in a state means we're currently collecting chars // in that type of token. // We do state machine until it builds a token (Eof is a token), then // return that token. thisChar = prevChar; // for first iteration, since prevChar is set to this bool done = false; // optimization while (!done) { prevChar = thisChar; thisChar = GetNextChar(); ctype = settings.CharTypes[thisChar]; // see if we need to change states, or emit a token switch (state) { case NextTokenState.Start: // RESET state = PickNextState(ctype, thisChar); tokenLineNumber = lineNumber; break; case NextTokenState.Char: token = new CharToken((char)prevChar, tokenLineNumber); done = true; nextTokenSb.Length = 0; break; case NextTokenState.Word: if ((!settings.IsCharType(ctype, CharTypeBits.Word)) && (!settings.IsCharType(ctype, CharTypeBits.Digit))) { // end of word, emit token = new WordToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } break; case NextTokenState.Whitespace: if (!settings.IsCharType(ctype, CharTypeBits.Whitespace) || (settings.GrabWhitespace && (thisChar == 10))) { // end of whitespace, emit if (settings.GrabWhitespace) { token = new WhitespaceToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } break; case NextTokenState.EndQuote: // we're now 1 char after end of quote token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; break; case NextTokenState.Quote: // looking for end quote matching char that started the quote if (thisChar == nextTokenSb[0]) { // handle escaped backslashes: count the immediately prior backslashes // - even (including 0) means it's not escaped // - odd means it is escaped int backSlashCount = 0; for (int i = nextTokenSb.Length - 1; i >= 0; i--) { if (nextTokenSb[i] == '\\') { backSlashCount++; } else { break; } } if ((backSlashCount % 2) == 0) { state = NextTokenState.EndQuote; } } if ((state != NextTokenState.EndQuote) && (thisChar == EOF)) { if (settings.DoUntermCheck) { nextTokenSb.Length = 0; throw new UntermQuoteException("Unterminated quote"); } token = new QuoteToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } break; case NextTokenState.MaybeComment: if (thisChar == EOF) { token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // if we get the right char, we're in a comment if (settings.SlashSlashComments && (thisChar == '/')) { state = NextTokenState.LineComment; } else if (settings.SlashStarComments && (thisChar == '*')) { state = NextTokenState.BlockComment; } else { token = new CharToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } } break; case NextTokenState.LineComment: if (thisChar == EOF) { if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } else { if (thisChar == '\n') { if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } } break; case NextTokenState.BlockComment: if (thisChar == EOF) { if (settings.DoUntermCheck) { nextTokenSb.Length = 0; throw new UntermCommentException("Unterminated comment."); } if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } } else { if ((thisChar == '/') && (prevChar == '*')) { state = NextTokenState.EndBlockComment; } } break; // special case for 2-character token termination case NextTokenState.EndBlockComment: if (settings.GrabComments) { token = new CommentToken(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } else { // RESET nextTokenSb.Length = 0; tokenLineNumber = lineNumber; state = PickNextState(ctype, thisChar); } break; case NextTokenState.MaybeHex: // previous char was 0 if (thisChar != 'x') { // back up and try non-hex // back up to the 0 nextTokenSb.Append((char)thisChar); backString.Append(nextTokenSb); nextTokenSb.Length = 0; // reset state and don't choose MaybeNumber state. // pull char from backString thisChar = backString[0]; backString.Remove(0, 1); state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, NextTokenState.MaybeHex); } else { state = NextTokenState.HexGot0x; } break; case NextTokenState.HexGot0x: if (!settings.IsCharType(ctype, CharTypeBits.HexDigit)) { // got 0x but now a non-hex char // back up to the 0 nextTokenSb.Append((char)thisChar); backString.Append(nextTokenSb); nextTokenSb.Length = 0; // reset state and don't choose MaybeNumber state. // pull char from backString thisChar = backString[0]; backString.Remove(0, 1); state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, NextTokenState.MaybeHex); } else { state = NextTokenState.HexNumber; } break; case NextTokenState.HexNumber: if (!settings.IsCharType(ctype, CharTypeBits.HexDigit)) { // emit the hex number we've collected token = IntToken.ParseHex(nextTokenSb.ToString(), tokenLineNumber); done = true; nextTokenSb.Length = 0; } break; case NextTokenState.MaybeNumber: // // Determine whether or not to stop collecting characters for // the number parse. We terminate when it's clear it's not // a number or no longer a number. // bool term = false; if (settings.IsCharType(ctype, CharTypeBits.Digit) || settings.IsCharType(prevChar, CharTypeBits.Digit)) { seenDigit = true; } // term conditions if (thisChar == '.') { seenDot++; if (seenDot > 1) { term = true; // more than one dot, it aint a number } } else if (((thisChar == 'e') || (thisChar == 'E'))) { seenE++; if (!seenDigit) { term = true; // e before any digits is bad } else if (seenE > 1) { term = true; // more than 1 e is bad } else { term = true; // done regardless // scan the exponent, put its characters into // nextTokenSb, if there are any char c; expSb.Clear(); expSb.Append((char)thisChar); if (GrabInt(expSb, true, out c)) { // we got a good exponent, tack it on nextTokenSb.Append(expSb); thisChar = c; // and continue after the exponent's characters } } } else if (thisChar == EOF) { term = true; } // or a char that can't be in a number else if ((!settings.IsCharType(ctype, CharTypeBits.Digit) && (thisChar != 'e') && (thisChar != 'E') && (thisChar != '-') && (thisChar != '.')) || ((thisChar == '+') && (seenE == 0))) { // it's not a normal number character term = true; } // or a dash not after e else if ((thisChar == '-') && (!((prevChar == 'e') || (prevChar == 'E')))) { term = true; } if (term) { // we are terminating a number, or it wasn't a number if (seenDigit) { if ((nextTokenSb.IndexOf('.') >= 0) || (nextTokenSb.IndexOf('e') >= 0) || (nextTokenSb.IndexOf('E') >= 0) || (nextTokenSb.Length >= 19) // probably too large for Int64, use float ) { token = new FloatToken(nextTokenSb.ToString(), tokenLineNumber); } else { token = new IntToken(nextTokenSb.ToString(), tokenLineNumber); } done = true; nextTokenSb.Length = 0; } else { // -whatever or -.whatever // didn't see any digits, must have gotten here by a leading - // and no digits after it // back up to -, pick next state excluding numbers nextTokenSb.Append((char)thisChar); backString.Append(nextTokenSb); nextTokenSb.Length = 0; // restart on the - and don't choose MaybeNumber state // pull char from backString thisChar = backString[0]; backString.Remove(0, 1); state = PickNextState(settings.CharTypes[thisChar], (int)thisChar, NextTokenState.MaybeNumber); } } break; case NextTokenState.Eol: // tokenLineNumber - 1 because the newline char is on the previous line token = new EolToken(tokenLineNumber - 1); done = true; nextTokenSb.Length = 0; _token = token; break; case NextTokenState.Eof: token = new EofToken(tokenLineNumber); done = true; nextTokenSb.Length = 0; _token = token; return(false); case NextTokenState.Invalid: default: // not a good sign, some unrepresented state? // log.Error("NextToken: Hit unrepresented state {0}", state); _token = null; return(false); } // use a StringBuilder to accumulate characters which are part of this token if (thisChar != EOF) { nextTokenSb.Append((char)thisChar); } } _token = token; return(true); }