private IExpression ParseLiteralExpression(ref IToken token) { if (token is NumberLiteralToken) { NumberLiteralToken numberLiteralToken = token as NumberLiteralToken; return(new NumericLiteralExpression(numberLiteralToken.Value)); } else if (token is StringLiteralToken) { StringLiteralToken stringLiteralToken = token as StringLiteralToken; return(new StringLiteralExpression(stringLiteralToken.Value)); } else if (token is BooleanLiteralToken) { BooleanLiteralToken booleanLiteralToken = token as BooleanLiteralToken; return(new BooleanLiteralExpression(booleanLiteralToken.Value)); } return(null); }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ internal override bool FindNextToken() { int startPosition = _reader.Position; // VB docs claim whitespace is Unicode category Zs. However, // this category does not contain tabs. Assuming a less restrictive // definition for whitespace... if (_reader.SinkWhiteSpace()) { while (_reader.SinkWhiteSpace()) { } // Now, we need to check for the line continuation character. if (_reader.SinkLineContinuationCharacter()) // Line continuation is '_' { // Save the current position because we may need to come back here. int savePosition = _reader.Position - 1; // Skip all whitespace after the '_' while (_reader.SinkWhiteSpace()) { } // Now, skip all the newlines. // Need at least one newline for this to count as line continuation. int count = 0; while (_reader.SinkNewLine()) { ++count; } if (count > 0) { current = new VisualBasicTokenizer.LineContinuationToken(); return true; } // Otherwise, fall back to plain old whitespace. _reader.Position = savePosition; } current = new WhitespaceToken(); return true; } // Line terminators are separate from whitespace and are significant. else if (_reader.SinkNewLine()) { // We want one token per line terminator. current = new VisualBasicTokenizer.LineTerminatorToken(); return true; } // Check for a comment--either those that start with ' or rem. else if (_reader.SinkLineCommentStart()) { // Skip to the first EOL. _reader.SinkToEndOfLine(); current = new CommentToken(); return true; } // Identifier or keyword? else if ( // VB allows escaping of identifiers by surrounding them with [] // In other words, // Date is a keyword but, // [Date] is an identifier. _reader.CurrentCharacter == '[' || _reader.MatchNextIdentifierStart() ) { bool escapedIdentifier = false; if (_reader.CurrentCharacter == '[') { escapedIdentifier = true; _reader.SinkCharacter(); // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return true; } } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } // If this was an escaped identifier the we need to get the terminating ']'. if (escapedIdentifier) { if (!_reader.Sink("]")) { current = new ExpectedIdentifierToken(); return true; } } else { // Escaped identifiers are not allowed to have trailing type character. _reader.SinkTypeCharacter(); // Type character is optional. } // An identifier that is only a '_' is illegal because it is // ambiguous with line continuation string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]") { current = new ExpectedIdentifierToken(); return true; } // Make an upper-case version in order to check whether this may be a keyword. string upper = identifierOrKeyword.ToUpper(CultureInfo.InvariantCulture); switch (upper) { default: if (Array.IndexOf(s_keywordList, upper) >= 0) { current = new KeywordToken(); return true; } // Create the token. current = new IdentifierToken(); // Trim off the [] if this is an escaped identifier. if (escapedIdentifier) { current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2); } return true; case "FALSE": case "TRUE": current = new BooleanLiteralToken(); return true; } } // Is it a hex integer? else if (_reader.SinkHexIntegerPrefix()) { if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return true; } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new HexIntegerLiteralToken(); return true; } // Is it an octal integer? else if (_reader.SinkOctalIntegerPrefix()) { if (!_reader.SinkMultipleOctalDigits()) { current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken(); return true; } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new VisualBasicTokenizer.OctalIntegerLiteralToken(); return true; } // Is it a decimal integer? else if (_reader.SinkMultipleDecimalDigits()) { // Sink a suffix if there is one. _reader.SinkDecimalIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return true; } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.SinkIgnoreCase("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.SinkIgnoreCase("#end if")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return true; } // Is it a separator? else if (_reader.SinkSeparatorCharacter()) { current = new VisualBasicTokenizer.SeparatorToken(); return true; } // Is it an operator? else if (_reader.SinkOperator()) { current = new OperatorToken(); return true; } // A string? else if (_reader.Sink("\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } } while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return true; } current = new StringLiteralToken(); return true; } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return true; }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // VB docs claim whitespace is Unicode category Zs. However, // this category does not contain tabs. Assuming a less restrictive // definition for whitespace... if (_reader.SinkWhiteSpace()) { while (_reader.SinkWhiteSpace()) { } // Now, we need to check for the line continuation character. if (_reader.SinkLineContinuationCharacter()) // Line continuation is '_' { // Save the current position because we may need to come back here. int savePosition = _reader.Position - 1; // Skip all whitespace after the '_' while (_reader.SinkWhiteSpace()) { } // Now, skip all the newlines. // Need at least one newline for this to count as line continuation. int count = 0; while (_reader.SinkNewLine()) { ++count; } if (count > 0) { current = new VisualBasicTokenizer.LineContinuationToken(); return(true); } // Otherwise, fall back to plain old whitespace. _reader.Position = savePosition; } current = new WhitespaceToken(); return(true); } // Line terminators are separate from whitespace and are significant. else if (_reader.SinkNewLine()) { // We want one token per line terminator. current = new VisualBasicTokenizer.LineTerminatorToken(); return(true); } // Check for a comment--either those that start with ' or rem. else if (_reader.SinkLineCommentStart()) { // Skip to the first EOL. _reader.SinkToEndOfLine(); current = new CommentToken(); return(true); } // Identifier or keyword? else if ( // VB allows escaping of identifiers by surrounding them with [] // In other words, // Date is a keyword but, // [Date] is an identifier. _reader.CurrentCharacter == '[' || _reader.MatchNextIdentifierStart() ) { bool escapedIdentifier = false; if (_reader.CurrentCharacter == '[') { escapedIdentifier = true; _reader.SinkCharacter(); // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return(true); } } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } // If this was an escaped identifier the we need to get the terminating ']'. if (escapedIdentifier) { if (!_reader.Sink("]")) { current = new ExpectedIdentifierToken(); return(true); } } else { // Escaped identifiers are not allowed to have trailing type character. _reader.SinkTypeCharacter(); // Type character is optional. } // An identifier that is only a '_' is illegal because it is // ambiguous with line continuation string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]") { current = new ExpectedIdentifierToken(); return(true); } // Make an upper-case version in order to check whether this may be a keyword. string upper = identifierOrKeyword.ToUpperInvariant(); switch (upper) { default: if (Array.IndexOf(s_keywordList, upper) >= 0) { current = new KeywordToken(); return(true); } // Create the token. current = new IdentifierToken(); // Trim off the [] if this is an escaped identifier. if (escapedIdentifier) { current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2); } return(true); case "FALSE": case "TRUE": current = new BooleanLiteralToken(); return(true); } } // Is it a hex integer? else if (_reader.SinkHexIntegerPrefix()) { if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return(true); } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new HexIntegerLiteralToken(); return(true); } // Is it an octal integer? else if (_reader.SinkOctalIntegerPrefix()) { if (!_reader.SinkMultipleOctalDigits()) { current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken(); return(true); } // Sink a suffix if there is one. _reader.SinkIntegerSuffix(); current = new VisualBasicTokenizer.OctalIntegerLiteralToken(); return(true); } // Is it a decimal integer? else if (_reader.SinkMultipleDecimalDigits()) { // Sink a suffix if there is one. _reader.SinkDecimalIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return(true); } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.SinkIgnoreCase("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.SinkIgnoreCase("#end if")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return(true); } // Is it a separator? else if (_reader.SinkSeparatorCharacter()) { current = new VisualBasicTokenizer.SeparatorToken(); return(true); } // Is it an operator? else if (_reader.SinkOperator()) { current = new OperatorToken(); return(true); } // A string? else if (_reader.Sink("\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return(true); } current = new StringLiteralToken(); return(true); } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return(true); }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // Dealing with whitespace? if (_reader.SinkMultipleWhiteSpace()) { current = new WhitespaceToken(); return(true); } // Check for one-line comment else if (_reader.Sink("//")) { // Looks like a one-line comment. Follow it to the End-of-line _reader.SinkToEndOfLine(); current = new CommentToken(); return(true); } // Check for multi-line comment else if (_reader.Sink("/*")) { _reader.SinkUntil("*/"); // Was the ending */ found? if (_reader.EndOfLines) { // No. There was a /* without a */. Return this a syntax error token. current = new CSharpTokenizer.EndOfFileInsideCommentToken(); return(true); } current = new CommentToken(); return(true); } // Handle chars else if (_reader.Sink("\'")) { while (_reader.CurrentCharacter != '\'') { if (_reader.Sink("\\")) { /* reader.Skip the escape sequence. * This isn't exactly right. We should detect: * * simple-escape-sequence: one of \' \" \\ \0 \a \b \f \n \r \t \v * * hexadecimal-escape-sequence: * \x hex-digit hex-digit[opt] hex-digit[opt] hex-digit[opt] */ } _reader.SinkCharacter(); } if (_reader.SinkCharacter() != '\'') { Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick."); } current = new CSharpTokenizer.CharLiteralToken(); return(true); } // Check for verbatim string else if (_reader.Sink("@\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return(true); } // reader.Skip the ending quote. current = new StringLiteralToken(); current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1); return(true); } // Check for a quoted string. else if (_reader.Sink("\"")) { while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral()) { // See if we have an escape sequence. if (_reader.SinkCharacter() == '\\') { // This is probably an escape character. if (_reader.SinkStringEscape()) { // This isn't nearly right. We just do barely enough to make a string // with an embedded escape sequence return _some_ string whose start and // end match the real bounds of the string. } else { // This is a compiler error. _reader.SinkCharacter(); current = new CSharpTokenizer.UnrecognizedStringEscapeToken(); return(true); } } } // Is it a newline? if (TokenChar.IsNewLine(_reader.CurrentCharacter)) { current = new CSharpTokenizer.NewlineInsideStringToken(); return(true); } // Create the token. if (_reader.SinkCharacter() != '\"') { Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote."); } current = new StringLiteralToken(); return(true); } // Identifier or keyword? else if ( // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier. _reader.CurrentCharacter == '@' || _reader.MatchNextIdentifierStart() ) { if (_reader.CurrentCharacter == '@') { _reader.SinkCharacter(); } // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return(true); } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); switch (identifierOrKeyword) { default: if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0) { current = new KeywordToken(); return(true); } // If the identifier starts with '@' then we need to strip it off. // The '@' is for escaping so that we can have an identifier called // the same thing as a reserved keyword (i.e. class, if, foreach, etc) string identifier = _reader.GetCurrentMatchedString(startPosition); if (identifier.StartsWith("@", StringComparison.Ordinal)) { identifier = identifier.Substring(1); } // Create the token. current = new IdentifierToken(); current.InnerText = identifier; return(true); case "false": case "true": current = new BooleanLiteralToken(); return(true); case "null": current = new CSharpTokenizer.NullLiteralToken(); return(true); } } // Open scope else if (_reader.Sink("{")) { current = new CSharpTokenizer.OpenScopeToken(); return(true); } // Close scope else if (_reader.Sink("}")) { current = new CSharpTokenizer.CloseScopeToken(); return(true); } // Hexidecimal integer literal else if (_reader.SinkIgnoreCase("0x")) { // Sink the hex digits. if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return(true); } // Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new HexIntegerLiteralToken(); return(true); } // Decimal integer literal else if (_reader.SinkMultipleDecimalDigits()) { // reader.Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return(true); } // Check for single-digit operators and punctuators else if (_reader.SinkOperatorOrPunctuator()) { current = new OperatorOrPunctuatorToken(); return(true); } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.Sink("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.Sink("#endif")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return(true); } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return(true); }
public static (Token Token, Lexer NextLexer) ReadToken(Lexer lexer) { var reader = lexer.Reader; var peek = reader.Peek(); switch (peek.Value) { case '$': return(LexerEngine.ReadAliasIdentifierToken(reader)); case ']': return(LexerEngine.ReadAttributeCloseToken(reader)); case '[': return(LexerEngine.ReadAttributeOpenToken(reader)); case '}': return(LexerEngine.ReadBlockCloseToken(reader)); case '{': return(LexerEngine.ReadBlockOpenToken(reader)); case ':': return(LexerEngine.ReadColonToken(reader)); case ',': return(LexerEngine.ReadCommaToken(reader)); case '/': return(LexerEngine.ReadCommentToken(reader)); case '=': return(LexerEngine.ReadEqualsOperatorToken(reader)); case ')': return(LexerEngine.ReadParenthesisCloseToken(reader)); case '(': return(LexerEngine.ReadParenthesisOpenToken(reader)); case '#': return(LexerEngine.ReadPragmaToken(reader)); case ';': return(LexerEngine.ReadStatementEndToken(reader)); case '"': return(LexerEngine.ReadStringLiteralToken(reader)); case '+': case '-': return(LexerEngine.ReadNumericLiteralToken(reader)); case '.': // if the next character is a decimalDigit then we're reading a RealLiteralToken with // no leading digits before the decimal point (e.g. ".45"), otherwise we're reading // a DotOperatorToken (e.g. the "." in "MyPropertyValue = MyEnum.Value;") var readAhead = reader.Read().NextReader; if (!readAhead.Eof() && StringValidator.IsDecimalDigit(readAhead.Peek().Value)) { return(LexerEngine.ReadNumericLiteralToken(reader)); } else { return(LexerEngine.ReadDotOperatorToken(reader)); } case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': // firstIdentifierChar var(identifierToken, nextLexer) = LexerEngine.ReadIdentifierToken(reader); var normalized = identifierToken.Name.ToLowerInvariant(); switch (normalized) { case "false": var falseToken = new BooleanLiteralToken(identifierToken.Extent, false); return(falseToken, nextLexer); case "true": var trueToken = new BooleanLiteralToken(identifierToken.Extent, true); return(trueToken, nextLexer); case "null": var nullLiteralToken = new NullLiteralToken(identifierToken.Extent); return(nullLiteralToken, nextLexer); default: return(identifierToken, nextLexer); } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // decimalDigit return(LexerEngine.ReadNumericLiteralToken(reader)); case '\u0020': // space case '\u0009': // horizontal tab case '\u000D': // carriage return case '\u000A': // line feed // WS return(LexerEngine.ReadWhitespaceToken(reader)); default: throw new UnexpectedCharacterException(peek); } }
private static void AssertAreEqualInternal(Token expectedToken, Token actualToken, int index = -1) { if ((expectedToken == null) && (actualToken == null)) { return; } if (expectedToken == null) { Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is null, but actual is not null", index)); } if (actualToken == null) { Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is not null, but actual is null", index)); } Assert.AreEqual(expectedToken.GetType(), actualToken.GetType(), LexerHelper.GetAssertErrorMessage($"actual type does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.Position, actualToken.Extent.StartPosition.Position, LexerHelper.GetAssertErrorMessage($"actual Start Position does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.LineNumber, actualToken.Extent.StartPosition.LineNumber, LexerHelper.GetAssertErrorMessage($"actual Start Line does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.ColumnNumber, actualToken.Extent.StartPosition.ColumnNumber, LexerHelper.GetAssertErrorMessage($"actual Start Column does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.Position, actualToken.Extent.EndPosition.Position, LexerHelper.GetAssertErrorMessage($"actual End Position does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.LineNumber, actualToken.Extent.EndPosition.LineNumber, LexerHelper.GetAssertErrorMessage($"actual End Line does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.ColumnNumber, actualToken.Extent.EndPosition.ColumnNumber, LexerHelper.GetAssertErrorMessage($"actual End Column does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.Text, actualToken.Extent.Text, LexerHelper.GetAssertErrorMessage($"actual Text does not match expected value", index)); switch (expectedToken) { case AliasIdentifierToken token: Assert.IsTrue( AliasIdentifierToken.AreEqual((AliasIdentifierToken)expectedToken, (AliasIdentifierToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case AttributeCloseToken token: Assert.IsTrue( AttributeCloseToken.AreEqual((AttributeCloseToken)expectedToken, (AttributeCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case AttributeOpenToken token: Assert.IsTrue( AttributeOpenToken.AreEqual((AttributeOpenToken)expectedToken, (AttributeOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BlockCloseToken token: Assert.IsTrue( BlockCloseToken.AreEqual((BlockCloseToken)expectedToken, (BlockCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BlockOpenToken token: Assert.IsTrue( BlockOpenToken.AreEqual((BlockOpenToken)expectedToken, (BlockOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BooleanLiteralToken token: Assert.IsTrue( BooleanLiteralToken.AreEqual((BooleanLiteralToken)expectedToken, (BooleanLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ColonToken token: Assert.IsTrue( ColonToken.AreEqual((ColonToken)expectedToken, (ColonToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case CommaToken token: Assert.IsTrue( CommaToken.AreEqual((CommaToken)expectedToken, (CommaToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case CommentToken token: Assert.IsTrue( CommentToken.AreEqual((CommentToken)expectedToken, (CommentToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case DotOperatorToken token: Assert.IsTrue( DotOperatorToken.AreEqual((DotOperatorToken)expectedToken, (DotOperatorToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case EqualsOperatorToken token: Assert.IsTrue( EqualsOperatorToken.AreEqual((EqualsOperatorToken)expectedToken, (EqualsOperatorToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case IdentifierToken token: Assert.IsTrue( IdentifierToken.AreEqual((IdentifierToken)expectedToken, (IdentifierToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case IntegerLiteralToken token: Assert.IsTrue( IntegerLiteralToken.AreEqual((IntegerLiteralToken)expectedToken, (IntegerLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case NullLiteralToken token: Assert.IsTrue( NullLiteralToken.AreEqual((NullLiteralToken)expectedToken, (NullLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ParenthesisCloseToken token: Assert.IsTrue( ParenthesisCloseToken.AreEqual((ParenthesisCloseToken)expectedToken, (ParenthesisCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ParenthesisOpenToken token: Assert.IsTrue( ParenthesisOpenToken.AreEqual((ParenthesisOpenToken)expectedToken, (ParenthesisOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case PragmaToken token: Assert.IsTrue( PragmaToken.AreEqual((PragmaToken)expectedToken, (PragmaToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case RealLiteralToken token: Assert.IsTrue( RealLiteralToken.AreEqual((RealLiteralToken)expectedToken, (RealLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case StatementEndToken token: Assert.IsTrue( StatementEndToken.AreEqual((StatementEndToken)expectedToken, (StatementEndToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case StringLiteralToken token: Assert.IsTrue( StringLiteralToken.AreEqual((StringLiteralToken)expectedToken, (StringLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case WhitespaceToken token: Assert.IsTrue( WhitespaceToken.AreEqual((WhitespaceToken)expectedToken, (WhitespaceToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; default: throw new NotImplementedException($"Cannot compare type '{expectedToken.GetType().Name}'"); } }
public BooleanValueAst(BooleanLiteralToken token) { this.Token = token ?? throw new ArgumentNullException(nameof(token)); }
/* * Method: FindNextToken * * Find the next token. Return 'true' if one was found. False, otherwise. */ override internal bool FindNextToken() { int startPosition = _reader.Position; // Dealing with whitespace? if (_reader.SinkMultipleWhiteSpace()) { current = new WhitespaceToken(); return true; } // Check for one-line comment else if (_reader.Sink("//")) { // Looks like a one-line comment. Follow it to the End-of-line _reader.SinkToEndOfLine(); current = new CommentToken(); return true; } // Check for multi-line comment else if (_reader.Sink("/*")) { _reader.SinkUntil("*/"); // Was the ending */ found? if (_reader.EndOfLines) { // No. There was a /* without a */. Return this a syntax error token. current = new CSharpTokenizer.EndOfFileInsideCommentToken(); return true; } current = new CommentToken(); return true; } // Handle chars else if (_reader.Sink("\'")) { while (_reader.CurrentCharacter != '\'') { if (_reader.Sink("\\")) { /* reader.Skip the escape sequence. This isn't exactly right. We should detect: simple-escape-sequence: one of \' \" \\ \0 \a \b \f \n \r \t \v hexadecimal-escape-sequence: \x hex-digit hex-digit[opt] hex-digit[opt] hex-digit[opt] */ } _reader.SinkCharacter(); } if (_reader.SinkCharacter() != '\'') { Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick."); } current = new CSharpTokenizer.CharLiteralToken(); return true; } // Check for verbatim string else if (_reader.Sink("@\"")) { do { // Inside a verbatim string "" is treated as a special character while (_reader.Sink("\"\"")) { } } while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"'); // Can't end a file inside a string if (_reader.EndOfLines) { current = new EndOfFileInsideStringToken(); return true; } // reader.Skip the ending quote. current = new StringLiteralToken(); current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1); return true; } // Check for a quoted string. else if (_reader.Sink("\"")) { while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral()) { // See if we have an escape sequence. if (_reader.SinkCharacter() == '\\') { // This is probably an escape character. if (_reader.SinkStringEscape()) { // This isn't nearly right. We just do barely enough to make a string // with an embedded escape sequence return _some_ string whose start and // end match the real bounds of the string. } else { // This is a compiler error. _reader.SinkCharacter(); current = new CSharpTokenizer.UnrecognizedStringEscapeToken(); return true; } } } // Is it a newline? if (TokenChar.IsNewLine(_reader.CurrentCharacter)) { current = new CSharpTokenizer.NewlineInsideStringToken(); return true; } // Create the token. if (_reader.SinkCharacter() != '\"') { Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote."); } current = new StringLiteralToken(); return true; } // Identifier or keyword? else if ( // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier. _reader.CurrentCharacter == '@' || _reader.MatchNextIdentifierStart() ) { if (_reader.CurrentCharacter == '@') { _reader.SinkCharacter(); } // Now, the next character must be an identifier start. if (!_reader.SinkIdentifierStart()) { current = new ExpectedIdentifierToken(); return true; } // Sink the rest of the identifier. while (_reader.SinkIdentifierPart()) { } string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition); switch (identifierOrKeyword) { default: if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0) { current = new KeywordToken(); return true; } // If the identifier starts with '@' then we need to strip it off. // The '@' is for escaping so that we can have an identifier called // the same thing as a reserved keyword (i.e. class, if, foreach, etc) string identifier = _reader.GetCurrentMatchedString(startPosition); if (identifier.StartsWith("@", StringComparison.Ordinal)) { identifier = identifier.Substring(1); } // Create the token. current = new IdentifierToken(); current.InnerText = identifier; return true; case "false": case "true": current = new BooleanLiteralToken(); return true; case "null": current = new CSharpTokenizer.NullLiteralToken(); return true; } } // Open scope else if (_reader.Sink("{")) { current = new CSharpTokenizer.OpenScopeToken(); return true; } // Close scope else if (_reader.Sink("}")) { current = new CSharpTokenizer.CloseScopeToken(); return true; } // Hexidecimal integer literal else if (_reader.SinkIgnoreCase("0x")) { // Sink the hex digits. if (!_reader.SinkMultipleHexDigits()) { current = new ExpectedValidHexDigitToken(); return true; } // Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new HexIntegerLiteralToken(); return true; } // Decimal integer literal else if (_reader.SinkMultipleDecimalDigits()) { // reader.Skip the L, U, l, u, ul, etc. _reader.SinkLongIntegerSuffix(); current = new DecimalIntegerLiteralToken(); return true; } // Check for single-digit operators and punctuators else if (_reader.SinkOperatorOrPunctuator()) { current = new OperatorOrPunctuatorToken(); return true; } // Preprocessor line else if (_reader.CurrentCharacter == '#') { if (_reader.Sink("#if")) { current = new OpenConditionalDirectiveToken(); } else if (_reader.Sink("#endif")) { current = new CloseConditionalDirectiveToken(); } else { current = new PreprocessorToken(); } _reader.SinkToEndOfLine(); return true; } // We didn't recognize the token, so this is a syntax error. _reader.SinkCharacter(); current = new UnrecognizedToken(); return true; }