/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ internal override bool FindNextToken() { int startPosition = _reader.Position; // VB docs claim whitespace is Unicode category Zs. However, // this category does not contain tabs. Assuming a less restrictive // definition for whitespace... if (_reader.SinkWhiteSpace()) { while (_reader.SinkWhiteSpace()) { } // Now, we need to check for the line continuation character. if (_reader.SinkLineContinuationCharacter()) // Line continuation is '_' { // Save the current position because we may need to come back here. int savePosition = _reader.Position - 1; // Skip all whitespace after the '_' while (_reader.SinkWhiteSpace()) { } // Now, skip all the newlines. // Need at least one newline for this to count as line continuation. int count = 0; while (_reader.SinkNewLine()) { ++count; } if (count > 0) { current = new VisualBasicTokenizer.LineContinuationToken(); return true; } // Otherwise, fall back to plain old whitespace. _reader.Position = savePosition; } current = new WhitespaceToken(); return true; } // Line terminators are separate from whitespace and are significant. else if (_reader.SinkNewLine()) { // We want one token per line terminator. current = new VisualBasicTokenizer.LineTerminatorToken(); return true; } // Check for a comment--either those that start with ' or rem. else if (_reader.SinkLineCommentStart()) { // Skip to the first EOL. _reader.SinkToEndOfLine(); current = new CommentToken(); return true; } // Identifier or keyword? else if ( // VB allows escaping of identifiers by surrounding them with [] // In other words, // Date is a keyword but, // [Date] is an identifier. _reader.CurrentCharacter == '[' || _reader.MatchNextIdentifierStart() ) { bool escapedIdentifier = false; if (_reader.CurrentCharacter == '[') { escapedIdentifier = true; _reader.SinkCharacter(); // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return true; } } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } // If this was an escaped identifier the we need to get the terminating ']'. if (escapedIdentifier) { if (!_reader.Sink("]")) { current = new ExpectedIdentifierToken(); return true; } } else { // Escaped identifiers are not allowed to have trailing type character. _reader.SinkTypeCharacter(); // Type character is optional. } // An identifier that is only a '_' is illegal because it is // ambiguous with line continuation string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]") { current = new ExpectedIdentifierToken(); return true; } // Make an upper-case version in order to check whether this may be a keyword. string upper = identifierOrKeyword.ToUpper(CultureInfo.InvariantCulture); switch (upper) { default: if (Array.IndexOf(s_keywordList, upper) >= 0) { current = new KeywordToken(); return true; } // Create the token. current = new IdentifierToken(); // Trim off the [] if this is an escaped identifier. if (escapedIdentifier) { current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2); } return true; case "FALSE": case "TRUE": current = new BooleanLiteralToken(); return true; } } // Is it a hex integer? else if (_reader.SinkHexIntegerPrefix()) { if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return true; } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new HexIntegerLiteralToken(); return true; } // Is it an octal integer? else if (_reader.SinkOctalIntegerPrefix()) { if (!_reader.SinkMultipleOctalDigits()) { current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken(); return true; } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new VisualBasicTokenizer.OctalIntegerLiteralToken(); return true; } // Is it a decimal integer? else if (_reader.SinkMultipleDecimalDigits()) { // Sink a suffix if there is one. _reader.SinkDecimalIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return true; } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.SinkIgnoreCase("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.SinkIgnoreCase("#end if")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return true; } // Is it a separator? else if (_reader.SinkSeparatorCharacter()) { current = new VisualBasicTokenizer.SeparatorToken(); return true; } // Is it an operator? else if (_reader.SinkOperator()) { current = new OperatorToken(); return true; } // A string? else if (_reader.Sink("\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } } while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return true; } current = new StringLiteralToken(); return true; } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return true; }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // VB docs claim whitespace is Unicode category Zs. However, // this category does not contain tabs. Assuming a less restrictive // definition for whitespace... if (_reader.SinkWhiteSpace()) { while (_reader.SinkWhiteSpace()) { } // Now, we need to check for the line continuation character. if (_reader.SinkLineContinuationCharacter()) // Line continuation is '_' { // Save the current position because we may need to come back here. int savePosition = _reader.Position - 1; // Skip all whitespace after the '_' while (_reader.SinkWhiteSpace()) { } // Now, skip all the newlines. // Need at least one newline for this to count as line continuation. int count = 0; while (_reader.SinkNewLine()) { ++count; } if (count > 0) { current = new VisualBasicTokenizer.LineContinuationToken(); return(true); } // Otherwise, fall back to plain old whitespace. _reader.Position = savePosition; } current = new WhitespaceToken(); return(true); } // Line terminators are separate from whitespace and are significant. else if (_reader.SinkNewLine()) { // We want one token per line terminator. current = new VisualBasicTokenizer.LineTerminatorToken(); return(true); } // Check for a comment--either those that start with ' or rem. else if (_reader.SinkLineCommentStart()) { // Skip to the first EOL. _reader.SinkToEndOfLine(); current = new CommentToken(); return(true); } // Identifier or keyword? else if ( // VB allows escaping of identifiers by surrounding them with [] // In other words, // Date is a keyword but, // [Date] is an identifier. _reader.CurrentCharacter == '[' || _reader.MatchNextIdentifierStart() ) { bool escapedIdentifier = false; if (_reader.CurrentCharacter == '[') { escapedIdentifier = true; _reader.SinkCharacter(); // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return(true); } } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } // If this was an escaped identifier the we need to get the terminating ']'. if (escapedIdentifier) { if (!_reader.Sink("]")) { current = new ExpectedIdentifierToken(); return(true); } } else { // Escaped identifiers are not allowed to have trailing type character. _reader.SinkTypeCharacter(); // Type character is optional. } // An identifier that is only a '_' is illegal because it is // ambiguous with line continuation string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]") { current = new ExpectedIdentifierToken(); return(true); } // Make an upper-case version in order to check whether this may be a keyword. string upper = identifierOrKeyword.ToUpperInvariant(); switch (upper) { default: if (Array.IndexOf(s_keywordList, upper) >= 0) { current = new KeywordToken(); return(true); } // Create the token. current = new IdentifierToken(); // Trim off the [] if this is an escaped identifier. if (escapedIdentifier) { current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2); } return(true); case "FALSE": case "TRUE": current = new BooleanLiteralToken(); return(true); } } // Is it a hex integer? else if (_reader.SinkHexIntegerPrefix()) { if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return(true); } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new HexIntegerLiteralToken(); return(true); } // Is it an octal integer? else if (_reader.SinkOctalIntegerPrefix()) { if (!_reader.SinkMultipleOctalDigits()) { current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken(); return(true); } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new VisualBasicTokenizer.OctalIntegerLiteralToken(); return(true); } // Is it a decimal integer? else if (_reader.SinkMultipleDecimalDigits()) { // Sink a suffix if there is one. _reader.SinkDecimalIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return(true); } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.SinkIgnoreCase("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.SinkIgnoreCase("#end if")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return(true); } // Is it a separator? else if (_reader.SinkSeparatorCharacter()) { current = new VisualBasicTokenizer.SeparatorToken(); return(true); } // Is it an operator? else if (_reader.SinkOperator()) { current = new OperatorToken(); return(true); } // A string? else if (_reader.Sink("\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return(true); } current = new StringLiteralToken(); return(true); } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return(true); }