public void EncodeTest(string Expected) { var Token = new WhitespaceToken(Expected); var Actual = Token.Encode(); Assert.Equal(Expected, Actual); }
public void Dispatch(WhitespaceToken token) { if (this.currentWordToken.Length == 0) { // We haven't read any actual data from the current line yet so lets ignore leading spaces return; } if (this.isWhitespace) { // The last token was whitespace too so let's skip this one return; } // Increment the line word count this.currentLineWordCount++; // Save off the current word token and create a new instance for the next word. var wordToken = this.currentWordToken; this.currentWordToken = new WordToken(); // Dispatch the current word token so it can be processed this.Dispatch(wordToken); this.isWhitespace = true; }
public void EscapeStateTokenizeWhitespace() { var lexer = new Lexer(); var tokens = lexer.Tokenize("\\s"); var expected = new WhitespaceToken(); Assert.AreEqual(tokens.Last(), expected); }
public void When_HasNoLines_DoesNotAdd() { var token = new WhitespaceToken(); Assert.That(() => token.Condense(), Throws.Nothing); Assert.That(token.Lines, Has.Count.EqualTo(0)); }
[TestCase("\r")] //Mac public void When_HasOneLine_WithCustomNewline(string newline) { var token = new WhitespaceToken(); token.Lines.Add(" "); Assert.That(() => token.Condense(newline), Throws.Nothing); Assert.That(token.Lines, Has.Count.EqualTo(1)); Assert.That(token.GetStringLines(), Is.EquivalentTo(new[] { newline })); }
public void When_HasOneLine() { var token = new WhitespaceToken(); token.Lines.Add(" "); Assert.That(() => token.Condense(), Throws.Nothing); Assert.That(token.Lines, Has.Count.EqualTo(1)); Assert.That(token.GetStringLines(), Is.EquivalentTo(new [] { Environment.NewLine })); }
public void When_HasSingleWhitespaceTokens() { var section = new ConfigIniSection(); var token1 = new WhitespaceToken(new[] { "", "\t", " " }, LineEnding.None); section.Tokens.Add(token1); section.MergeConsecutiveTokens(); Assert.That(section.Tokens, Has.Count.EqualTo(1)); Assert.That(section.Tokens[0], Is.SameAs(token1)); Assert.That(token1.GetStringLines(), Is.EquivalentTo(new[] { "", "\t", " " })); }
public void Test_3_5_11_Spaces_C() { Scanner lexer = this.GetLexer(" \t \r \n "); object obj = lexer.GetToken(); Assert.IsInstanceOfType(obj, typeof(WhitespaceToken)); WhitespaceToken token = (WhitespaceToken)obj; Assert.IsTrue(token.IsValid); Assert.IsNull(token.ScanError); Assert.AreEqual(0, token.StartPosition.Position); Assert.AreEqual(8, token.StopPosition.Position); }
public override TokenizationResult Tokenize(ref LexerRuntimeInfo info) { var startPosition = info.Reader.CurrentPosition; var whitespaceSpan = info.Reader.ReadTillEndOfWhitespace(); if (whitespaceSpan.Length == 0) { return(NoWhitespaceResult); } var token = new WhitespaceToken(startPosition, whitespaceSpan.Length); return(TokenizationResult.Successful(token)); }
public override InputStream TryRead(InputStream stream, out LexicToken token) { token = null; char firstChar = stream.Content[0]; if (firstChar == ' ' || firstChar == '\t' || firstChar == '\n' || firstChar == '\r') { token = new WhitespaceToken(); return stream.Move(1); } return stream; }
public override InputStream TryRead(InputStream stream, out LexicToken token) { token = null; char firstChar = stream.Content[0]; if (firstChar == ' ' || firstChar == '\t' || firstChar == '\n' || firstChar == '\r') { token = new WhitespaceToken(); return(stream.Move(1)); } return(stream); }
public void When_Has3NonConsecutiveTokens() { var section = new ConfigIniSection(); var token1 = new CommentToken(new[] { "; Hey", ";Whats", " ;Up?" }, LineEnding.None); var token2 = new WhitespaceToken(new[] { " ", "\t", "" }, LineEnding.None); var token3 = new CommentToken(new[] { ";Baz" }, LineEnding.None); section.Tokens.Add(token1); section.Tokens.Add(token2); section.Tokens.Add(token3); section.MergeConsecutiveTokens(); Assert.That(section.Tokens, Has.Count.EqualTo(3)); Assert.That(section.Tokens[0], Is.SameAs(token1)); Assert.That(section.Tokens[1], Is.SameAs(token2)); Assert.That(section.Tokens[2], Is.SameAs(token3)); Assert.That(token1.GetStringLines(), Is.EquivalentTo(new[] { "; Hey", ";Whats", " ;Up?" })); Assert.That(token2.GetStringLines(), Is.EquivalentTo(new[] { " ", "\t", "" })); Assert.That(token3.GetStringLines(), Is.EquivalentTo(new[] { ";Baz" })); }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // Dealing with whitespace? if (_reader.SinkMultipleWhiteSpace()) { current = new WhitespaceToken(); return(true); } // Check for one-line comment else if (_reader.Sink("//")) { // Looks like a one-line comment. Follow it to the End-of-line _reader.SinkToEndOfLine(); current = new CommentToken(); return(true); } // Check for multi-line comment else if (_reader.Sink("/*")) { _reader.SinkUntil("*/"); // Was the ending */ found? if (_reader.EndOfLines) { // No. There was a /* without a */. Return this a syntax error token. current = new CSharpTokenizer.EndOfFileInsideCommentToken(); return(true); } current = new CommentToken(); return(true); } // Handle chars else if (_reader.Sink("\'")) { while (_reader.CurrentCharacter != '\'') { if (_reader.Sink("\\")) { /* reader.Skip the escape sequence. * This isn't exactly right. We should detect: * * simple-escape-sequence: one of \' \" \\ \0 \a \b \f \n \r \t \v * * hexadecimal-escape-sequence: * \x hex-digit hex-digit[opt] hex-digit[opt] hex-digit[opt] */ } _reader.SinkCharacter(); } if (_reader.SinkCharacter() != '\'') { Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick."); } current = new CSharpTokenizer.CharLiteralToken(); return(true); } // Check for verbatim string else if (_reader.Sink("@\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return(true); } // reader.Skip the ending quote. current = new StringLiteralToken(); current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1); return(true); } // Check for a quoted string. else if (_reader.Sink("\"")) { while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral()) { // See if we have an escape sequence. if (_reader.SinkCharacter() == '\\') { // This is probably an escape character. if (_reader.SinkStringEscape()) { // This isn't nearly right. We just do barely enough to make a string // with an embedded escape sequence return _some_ string whose start and // end match the real bounds of the string. } else { // This is a compiler error. _reader.SinkCharacter(); current = new CSharpTokenizer.UnrecognizedStringEscapeToken(); return(true); } } } // Is it a newline? if (TokenChar.IsNewLine(_reader.CurrentCharacter)) { current = new CSharpTokenizer.NewlineInsideStringToken(); return(true); } // Create the token. if (_reader.SinkCharacter() != '\"') { Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote."); } current = new StringLiteralToken(); return(true); } // Identifier or keyword? else if ( // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier. _reader.CurrentCharacter == '@' || _reader.MatchNextIdentifierStart() ) { if (_reader.CurrentCharacter == '@') { _reader.SinkCharacter(); } // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return(true); } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); switch (identifierOrKeyword) { default: if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0) { current = new KeywordToken(); return(true); } // If the identifier starts with '@' then we need to strip it off. // The '@' is for escaping so that we can have an identifier called // the same thing as a reserved keyword (i.e. class, if, foreach, etc) string identifier = _reader.GetCurrentMatchedString(startPosition); if (identifier.StartsWith("@", StringComparison.Ordinal)) { identifier = identifier.Substring(1); } // Create the token. current = new IdentifierToken(); current.InnerText = identifier; return(true); case "false": case "true": current = new BooleanLiteralToken(); return(true); case "null": current = new CSharpTokenizer.NullLiteralToken(); return(true); } } // Open scope else if (_reader.Sink("{")) { current = new CSharpTokenizer.OpenScopeToken(); return(true); } // Close scope else if (_reader.Sink("}")) { current = new CSharpTokenizer.CloseScopeToken(); return(true); } // Hexidecimal integer literal else if (_reader.SinkIgnoreCase("0x")) { // Sink the hex digits. if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return(true); } // Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new HexIntegerLiteralToken(); return(true); } // Decimal integer literal else if (_reader.SinkMultipleDecimalDigits()) { // reader.Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return(true); } // Check for single-digit operators and punctuators else if (_reader.SinkOperatorOrPunctuator()) { current = new OperatorOrPunctuatorToken(); return(true); } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.Sink("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.Sink("#endif")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return(true); } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return(true); }
private static void AssertAreEqualInternal(Token expectedToken, Token actualToken, int index = -1) { if ((expectedToken == null) && (actualToken == null)) { return; } if (expectedToken == null) { Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is null, but actual is not null", index)); } if (actualToken == null) { Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is not null, but actual is null", index)); } Assert.AreEqual(expectedToken.GetType(), actualToken.GetType(), LexerHelper.GetAssertErrorMessage($"actual type does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.Position, actualToken.Extent.StartPosition.Position, LexerHelper.GetAssertErrorMessage($"actual Start Position does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.LineNumber, actualToken.Extent.StartPosition.LineNumber, LexerHelper.GetAssertErrorMessage($"actual Start Line does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.ColumnNumber, actualToken.Extent.StartPosition.ColumnNumber, LexerHelper.GetAssertErrorMessage($"actual Start Column does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.Position, actualToken.Extent.EndPosition.Position, LexerHelper.GetAssertErrorMessage($"actual End Position does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.LineNumber, actualToken.Extent.EndPosition.LineNumber, LexerHelper.GetAssertErrorMessage($"actual End Line does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.ColumnNumber, actualToken.Extent.EndPosition.ColumnNumber, LexerHelper.GetAssertErrorMessage($"actual End Column does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.Text, actualToken.Extent.Text, LexerHelper.GetAssertErrorMessage($"actual Text does not match expected value", index)); switch (expectedToken) { case AliasIdentifierToken token: Assert.IsTrue( AliasIdentifierToken.AreEqual((AliasIdentifierToken)expectedToken, (AliasIdentifierToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case AttributeCloseToken token: Assert.IsTrue( AttributeCloseToken.AreEqual((AttributeCloseToken)expectedToken, (AttributeCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case AttributeOpenToken token: Assert.IsTrue( AttributeOpenToken.AreEqual((AttributeOpenToken)expectedToken, (AttributeOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BlockCloseToken token: Assert.IsTrue( BlockCloseToken.AreEqual((BlockCloseToken)expectedToken, (BlockCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BlockOpenToken token: Assert.IsTrue( BlockOpenToken.AreEqual((BlockOpenToken)expectedToken, (BlockOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BooleanLiteralToken token: Assert.IsTrue( BooleanLiteralToken.AreEqual((BooleanLiteralToken)expectedToken, (BooleanLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ColonToken token: Assert.IsTrue( ColonToken.AreEqual((ColonToken)expectedToken, (ColonToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case CommaToken token: Assert.IsTrue( CommaToken.AreEqual((CommaToken)expectedToken, (CommaToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case CommentToken token: Assert.IsTrue( CommentToken.AreEqual((CommentToken)expectedToken, (CommentToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case DotOperatorToken token: Assert.IsTrue( DotOperatorToken.AreEqual((DotOperatorToken)expectedToken, (DotOperatorToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case EqualsOperatorToken token: Assert.IsTrue( EqualsOperatorToken.AreEqual((EqualsOperatorToken)expectedToken, (EqualsOperatorToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case IdentifierToken token: Assert.IsTrue( IdentifierToken.AreEqual((IdentifierToken)expectedToken, (IdentifierToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case IntegerLiteralToken token: Assert.IsTrue( IntegerLiteralToken.AreEqual((IntegerLiteralToken)expectedToken, (IntegerLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case NullLiteralToken token: Assert.IsTrue( NullLiteralToken.AreEqual((NullLiteralToken)expectedToken, (NullLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ParenthesisCloseToken token: Assert.IsTrue( ParenthesisCloseToken.AreEqual((ParenthesisCloseToken)expectedToken, (ParenthesisCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ParenthesisOpenToken token: Assert.IsTrue( ParenthesisOpenToken.AreEqual((ParenthesisOpenToken)expectedToken, (ParenthesisOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case PragmaToken token: Assert.IsTrue( PragmaToken.AreEqual((PragmaToken)expectedToken, (PragmaToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case RealLiteralToken token: Assert.IsTrue( RealLiteralToken.AreEqual((RealLiteralToken)expectedToken, (RealLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case StatementEndToken token: Assert.IsTrue( StatementEndToken.AreEqual((StatementEndToken)expectedToken, (StatementEndToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case StringLiteralToken token: Assert.IsTrue( StringLiteralToken.AreEqual((StringLiteralToken)expectedToken, (StringLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case WhitespaceToken token: Assert.IsTrue( WhitespaceToken.AreEqual((WhitespaceToken)expectedToken, (WhitespaceToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; default: throw new NotImplementedException($"Cannot compare type '{expectedToken.GetType().Name}'"); } }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // VB docs claim whitespace is Unicode category Zs. However, // this category does not contain tabs. Assuming a less restrictive // definition for whitespace... if (_reader.SinkWhiteSpace()) { while (_reader.SinkWhiteSpace()) { } // Now, we need to check for the line continuation character. if (_reader.SinkLineContinuationCharacter()) // Line continuation is '_' { // Save the current position because we may need to come back here. int savePosition = _reader.Position - 1; // Skip all whitespace after the '_' while (_reader.SinkWhiteSpace()) { } // Now, skip all the newlines. // Need at least one newline for this to count as line continuation. int count = 0; while (_reader.SinkNewLine()) { ++count; } if (count > 0) { current = new VisualBasicTokenizer.LineContinuationToken(); return(true); } // Otherwise, fall back to plain old whitespace. _reader.Position = savePosition; } current = new WhitespaceToken(); return(true); } // Line terminators are separate from whitespace and are significant. else if (_reader.SinkNewLine()) { // We want one token per line terminator. current = new VisualBasicTokenizer.LineTerminatorToken(); return(true); } // Check for a comment--either those that start with ' or rem. else if (_reader.SinkLineCommentStart()) { // Skip to the first EOL. _reader.SinkToEndOfLine(); current = new CommentToken(); return(true); } // Identifier or keyword? else if ( // VB allows escaping of identifiers by surrounding them with [] // In other words, // Date is a keyword but, // [Date] is an identifier. _reader.CurrentCharacter == '[' || _reader.MatchNextIdentifierStart() ) { bool escapedIdentifier = false; if (_reader.CurrentCharacter == '[') { escapedIdentifier = true; _reader.SinkCharacter(); // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return(true); } } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } // If this was an escaped identifier the we need to get the terminating ']'. if (escapedIdentifier) { if (!_reader.Sink("]")) { current = new ExpectedIdentifierToken(); return(true); } } else { // Escaped identifiers are not allowed to have trailing type character. _reader.SinkTypeCharacter(); // Type character is optional. } // An identifier that is only a '_' is illegal because it is // ambiguous with line continuation string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]") { current = new ExpectedIdentifierToken(); return(true); } // Make an upper-case version in order to check whether this may be a keyword. string upper = identifierOrKeyword.ToUpperInvariant(); switch (upper) { default: if (Array.IndexOf(s_keywordList, upper) >= 0) { current = new KeywordToken(); return(true); } // Create the token. current = new IdentifierToken(); // Trim off the [] if this is an escaped identifier. if (escapedIdentifier) { current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2); } return(true); case "FALSE": case "TRUE": current = new BooleanLiteralToken(); return(true); } } // Is it a hex integer? else if (_reader.SinkHexIntegerPrefix()) { if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return(true); } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new HexIntegerLiteralToken(); return(true); } // Is it an octal integer? else if (_reader.SinkOctalIntegerPrefix()) { if (!_reader.SinkMultipleOctalDigits()) { current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken(); return(true); } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new VisualBasicTokenizer.OctalIntegerLiteralToken(); return(true); } // Is it a decimal integer? else if (_reader.SinkMultipleDecimalDigits()) { // Sink a suffix if there is one. _reader.SinkDecimalIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return(true); } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.SinkIgnoreCase("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.SinkIgnoreCase("#end if")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return(true); } // Is it a separator? else if (_reader.SinkSeparatorCharacter()) { current = new VisualBasicTokenizer.SeparatorToken(); return(true); } // Is it an operator? else if (_reader.SinkOperator()) { current = new OperatorToken(); return(true); } // A string? else if (_reader.Sink("\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return(true); } current = new StringLiteralToken(); return(true); } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return(true); }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ internal override bool FindNextToken() { int startPosition = _reader.Position; // VB docs claim whitespace is Unicode category Zs. However, // this category does not contain tabs. Assuming a less restrictive // definition for whitespace... if (_reader.SinkWhiteSpace()) { while (_reader.SinkWhiteSpace()) { } // Now, we need to check for the line continuation character. if (_reader.SinkLineContinuationCharacter()) // Line continuation is '_' { // Save the current position because we may need to come back here. int savePosition = _reader.Position - 1; // Skip all whitespace after the '_' while (_reader.SinkWhiteSpace()) { } // Now, skip all the newlines. // Need at least one newline for this to count as line continuation. int count = 0; while (_reader.SinkNewLine()) { ++count; } if (count > 0) { current = new VisualBasicTokenizer.LineContinuationToken(); return true; } // Otherwise, fall back to plain old whitespace. _reader.Position = savePosition; } current = new WhitespaceToken(); return true; } // Line terminators are separate from whitespace and are significant. else if (_reader.SinkNewLine()) { // We want one token per line terminator. current = new VisualBasicTokenizer.LineTerminatorToken(); return true; } // Check for a comment--either those that start with ' or rem. else if (_reader.SinkLineCommentStart()) { // Skip to the first EOL. _reader.SinkToEndOfLine(); current = new CommentToken(); return true; } // Identifier or keyword? else if ( // VB allows escaping of identifiers by surrounding them with [] // In other words, // Date is a keyword but, // [Date] is an identifier. _reader.CurrentCharacter == '[' || _reader.MatchNextIdentifierStart() ) { bool escapedIdentifier = false; if (_reader.CurrentCharacter == '[') { escapedIdentifier = true; _reader.SinkCharacter(); // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return true; } } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } // If this was an escaped identifier the we need to get the terminating ']'. if (escapedIdentifier) { if (!_reader.Sink("]")) { current = new ExpectedIdentifierToken(); return true; } } else { // Escaped identifiers are not allowed to have trailing type character. _reader.SinkTypeCharacter(); // Type character is optional. } // An identifier that is only a '_' is illegal because it is // ambiguous with line continuation string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]") { current = new ExpectedIdentifierToken(); return true; } // Make an upper-case version in order to check whether this may be a keyword. string upper = identifierOrKeyword.ToUpper(CultureInfo.InvariantCulture); switch (upper) { default: if (Array.IndexOf(s_keywordList, upper) >= 0) { current = new KeywordToken(); return true; } // Create the token. current = new IdentifierToken(); // Trim off the [] if this is an escaped identifier. if (escapedIdentifier) { current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2); } return true; case "FALSE": case "TRUE": current = new BooleanLiteralToken(); return true; } } // Is it a hex integer? else if (_reader.SinkHexIntegerPrefix()) { if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return true; } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new HexIntegerLiteralToken(); return true; } // Is it an octal integer? else if (_reader.SinkOctalIntegerPrefix()) { if (!_reader.SinkMultipleOctalDigits()) { current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken(); return true; } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new VisualBasicTokenizer.OctalIntegerLiteralToken(); return true; } // Is it a decimal integer? else if (_reader.SinkMultipleDecimalDigits()) { // Sink a suffix if there is one. _reader.SinkDecimalIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return true; } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.SinkIgnoreCase("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.SinkIgnoreCase("#end if")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return true; } // Is it a separator? else if (_reader.SinkSeparatorCharacter()) { current = new VisualBasicTokenizer.SeparatorToken(); return true; } // Is it an operator? else if (_reader.SinkOperator()) { current = new OperatorToken(); return true; } // A string? else if (_reader.Sink("\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } } while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return true; } current = new StringLiteralToken(); return true; } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return true; }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // Dealing with whitespace? if (_reader.SinkMultipleWhiteSpace()) { current = new WhitespaceToken(); return true; } // Check for one-line comment else if (_reader.Sink("//")) { // Looks like a one-line comment. Follow it to the End-of-line _reader.SinkToEndOfLine(); current = new CommentToken(); return true; } // Check for multi-line comment else if (_reader.Sink("/*")) { _reader.SinkUntil("*/"); // Was the ending */ found? if (_reader.EndOfLines) { // No. There was a /* without a */. Return this a syntax error token. current = new CSharpTokenizer.EndOfFileInsideCommentToken(); return true; } current = new CommentToken(); return true; } // Handle chars else if (_reader.Sink("\'")) { while (_reader.CurrentCharacter != '\'') { if (_reader.Sink("\\")) { /* reader.Skip the escape sequence. This isn't exactly right. We should detect: simple-escape-sequence: one of \' \" \\ \0 \a \b \f \n \r \t \v hexadecimal-escape-sequence: \x hex-digit hex-digit[opt] hex-digit[opt] hex-digit[opt] */ } _reader.SinkCharacter(); } if (_reader.SinkCharacter() != '\'') { Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick."); } current = new CSharpTokenizer.CharLiteralToken(); return true; } // Check for verbatim string else if (_reader.Sink("@\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } } while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return true; } // reader.Skip the ending quote. current = new StringLiteralToken(); current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1); return true; } // Check for a quoted string. else if (_reader.Sink("\"")) { while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral()) { // See if we have an escape sequence. if (_reader.SinkCharacter() == '\\') { // This is probably an escape character. if (_reader.SinkStringEscape()) { // This isn't nearly right. We just do barely enough to make a string // with an embedded escape sequence return _some_ string whose start and // end match the real bounds of the string. } else { // This is a compiler error. _reader.SinkCharacter(); current = new CSharpTokenizer.UnrecognizedStringEscapeToken(); return true; } } } // Is it a newline? if (TokenChar.IsNewLine(_reader.CurrentCharacter)) { current = new CSharpTokenizer.NewlineInsideStringToken(); return true; } // Create the token. if (_reader.SinkCharacter() != '\"') { Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote."); } current = new StringLiteralToken(); return true; } // Identifier or keyword? else if ( // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier. _reader.CurrentCharacter == '@' || _reader.MatchNextIdentifierStart() ) { if (_reader.CurrentCharacter == '@') { _reader.SinkCharacter(); } // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return true; } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); switch (identifierOrKeyword) { default: if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0) { current = new KeywordToken(); return true; } // If the identifier starts with '@' then we need to strip it off. // The '@' is for escaping so that we can have an identifier called // the same thing as a reserved keyword (i.e. class, if, foreach, etc) string identifier = _reader.GetCurrentMatchedString(startPosition); if (identifier.StartsWith("@", StringComparison.Ordinal)) { identifier = identifier.Substring(1); } // Create the token. current = new IdentifierToken(); current.InnerText = identifier; return true; case "false": case "true": current = new BooleanLiteralToken(); return true; case "null": current = new CSharpTokenizer.NullLiteralToken(); return true; } } // Open scope else if (_reader.Sink("{")) { current = new CSharpTokenizer.OpenScopeToken(); return true; } // Close scope else if (_reader.Sink("}")) { current = new CSharpTokenizer.CloseScopeToken(); return true; } // Hexidecimal integer literal else if (_reader.SinkIgnoreCase("0x")) { // Sink the hex digits. if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return true; } // Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new HexIntegerLiteralToken(); return true; } // Decimal integer literal else if (_reader.SinkMultipleDecimalDigits()) { // reader.Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return true; } // Check for single-digit operators and punctuators else if (_reader.SinkOperatorOrPunctuator()) { current = new OperatorOrPunctuatorToken(); return true; } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.Sink("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.Sink("#endif")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return true; } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return true; }