Beispiel #1
0
        public void EncodeTest(string Expected)
        {
            var Token  = new WhitespaceToken(Expected);
            var Actual = Token.Encode();

            Assert.Equal(Expected, Actual);
        }
Beispiel #2
0
        public void Dispatch(WhitespaceToken token)
        {
            if (this.currentWordToken.Length == 0)
            {
                // We haven't read any actual data from the current line yet so lets ignore leading spaces
                return;
            }

            if (this.isWhitespace)
            {
                // The last token was whitespace too so let's skip this one
                return;
            }

            // Increment the line word count
            this.currentLineWordCount++;

            // Save off the current word token and create a new instance for the next word.
            var wordToken = this.currentWordToken;

            this.currentWordToken = new WordToken();

            // Dispatch the current word token so it can be processed
            this.Dispatch(wordToken);

            this.isWhitespace = true;
        }
Beispiel #3
0
        public void EscapeStateTokenizeWhitespace()
        {
            var lexer    = new Lexer();
            var tokens   = lexer.Tokenize("\\s");
            var expected = new WhitespaceToken();

            Assert.AreEqual(tokens.Last(), expected);
        }
            public void When_HasNoLines_DoesNotAdd()
            {
                var token = new WhitespaceToken();

                Assert.That(() => token.Condense(), Throws.Nothing);

                Assert.That(token.Lines, Has.Count.EqualTo(0));
            }
            [TestCase("\r")]   //Mac
            public void When_HasOneLine_WithCustomNewline(string newline)
            {
                var token = new WhitespaceToken();

                token.Lines.Add("  ");

                Assert.That(() => token.Condense(newline), Throws.Nothing);

                Assert.That(token.Lines, Has.Count.EqualTo(1));
                Assert.That(token.GetStringLines(), Is.EquivalentTo(new[] { newline }));
            }
            public void When_HasOneLine()
            {
                var token = new WhitespaceToken();

                token.Lines.Add("    ");

                Assert.That(() => token.Condense(), Throws.Nothing);

                Assert.That(token.Lines, Has.Count.EqualTo(1));
                Assert.That(token.GetStringLines(), Is.EquivalentTo(new [] { Environment.NewLine }));
            }
            public void When_HasSingleWhitespaceTokens()
            {
                var section = new ConfigIniSection();
                var token1  = new WhitespaceToken(new[] { "", "\t", "  " }, LineEnding.None);

                section.Tokens.Add(token1);
                section.MergeConsecutiveTokens();

                Assert.That(section.Tokens, Has.Count.EqualTo(1));
                Assert.That(section.Tokens[0], Is.SameAs(token1));
                Assert.That(token1.GetStringLines(), Is.EquivalentTo(new[] { "", "\t", "  " }));
            }
Beispiel #8
0
        public void Test_3_5_11_Spaces_C()
        {
            Scanner lexer = this.GetLexer(" \t \r \n   ");
            object  obj   = lexer.GetToken();

            Assert.IsInstanceOfType(obj, typeof(WhitespaceToken));
            WhitespaceToken token = (WhitespaceToken)obj;

            Assert.IsTrue(token.IsValid);
            Assert.IsNull(token.ScanError);
            Assert.AreEqual(0, token.StartPosition.Position);
            Assert.AreEqual(8, token.StopPosition.Position);
        }
Beispiel #9
0
        public override TokenizationResult Tokenize(ref LexerRuntimeInfo info)
        {
            var startPosition  = info.Reader.CurrentPosition;
            var whitespaceSpan = info.Reader.ReadTillEndOfWhitespace();

            if (whitespaceSpan.Length == 0)
            {
                return(NoWhitespaceResult);
            }

            var token = new WhitespaceToken(startPosition, whitespaceSpan.Length);

            return(TokenizationResult.Successful(token));
        }
Beispiel #10
0
		public override InputStream TryRead(InputStream stream, out LexicToken token)
		{
			token = null;

			char firstChar = stream.Content[0];

			if (firstChar == ' ' ||
				firstChar == '\t' ||
				firstChar == '\n' ||
				firstChar == '\r')
			{
				token = new WhitespaceToken();
				return stream.Move(1);
			}

			return stream;
		}
Beispiel #11
0
        public override InputStream TryRead(InputStream stream, out LexicToken token)
        {
            token = null;

            char firstChar = stream.Content[0];

            if (firstChar == ' ' ||
                firstChar == '\t' ||
                firstChar == '\n' ||
                firstChar == '\r')
            {
                token = new WhitespaceToken();
                return(stream.Move(1));
            }

            return(stream);
        }
            public void When_Has3NonConsecutiveTokens()
            {
                var section = new ConfigIniSection();
                var token1  = new CommentToken(new[] { "; Hey", ";Whats", " ;Up?" }, LineEnding.None);
                var token2  = new WhitespaceToken(new[] { " ", "\t", "" }, LineEnding.None);
                var token3  = new CommentToken(new[] { ";Baz" }, LineEnding.None);

                section.Tokens.Add(token1);
                section.Tokens.Add(token2);
                section.Tokens.Add(token3);
                section.MergeConsecutiveTokens();

                Assert.That(section.Tokens, Has.Count.EqualTo(3));
                Assert.That(section.Tokens[0], Is.SameAs(token1));
                Assert.That(section.Tokens[1], Is.SameAs(token2));
                Assert.That(section.Tokens[2], Is.SameAs(token3));
                Assert.That(token1.GetStringLines(), Is.EquivalentTo(new[] { "; Hey", ";Whats", " ;Up?" }));
                Assert.That(token2.GetStringLines(), Is.EquivalentTo(new[] { " ", "\t", "" }));
                Assert.That(token3.GetStringLines(), Is.EquivalentTo(new[] { ";Baz" }));
            }
        /*
         * Method:  FindNextToken
         *
         * Find the next token. Return 'true' if one was found. False, otherwise.
         */
        override internal bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // Dealing with whitespace?
            if (_reader.SinkMultipleWhiteSpace())
            {
                current = new WhitespaceToken();
                return(true);
            }
            // Check for one-line comment
            else if (_reader.Sink("//"))
            {
                // Looks like a one-line comment. Follow it to the End-of-line
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return(true);
            }
            // Check for multi-line comment
            else if (_reader.Sink("/*"))
            {
                _reader.SinkUntil("*/");

                // Was the ending */ found?
                if (_reader.EndOfLines)
                {
                    // No. There was a /* without a */. Return this a syntax error token.
                    current = new CSharpTokenizer.EndOfFileInsideCommentToken();
                    return(true);
                }

                current = new CommentToken();
                return(true);
            }
            // Handle chars
            else if (_reader.Sink("\'"))
            {
                while (_reader.CurrentCharacter != '\'')
                {
                    if (_reader.Sink("\\"))
                    {
                        /* reader.Skip the escape sequence.
                         *  This isn't exactly right. We should detect:
                         *
                         *  simple-escape-sequence: one of
                         \' \" \\ \0 \a \b \f \n \r \t \v
                         *
                         *  hexadecimal-escape-sequence:
                         *  \x   hex-digit   hex-digit[opt]   hex-digit[opt]  hex-digit[opt]
                         */
                    }

                    _reader.SinkCharacter();
                }

                if (_reader.SinkCharacter() != '\'')
                {
                    Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick.");
                }
                current = new CSharpTokenizer.CharLiteralToken();
                return(true);
            }
            // Check for verbatim string
            else if (_reader.Sink("@\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return(true);
                }

                // reader.Skip the ending quote.
                current           = new StringLiteralToken();
                current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1);
                return(true);
            }
            // Check for a quoted string.
            else if (_reader.Sink("\""))
            {
                while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral())
                {
                    // See if we have an escape sequence.
                    if (_reader.SinkCharacter() == '\\')
                    {
                        // This is probably an escape character.
                        if (_reader.SinkStringEscape())
                        {
                            // This isn't nearly right. We just do barely enough to make a string
                            // with an embedded escape sequence return _some_ string whose start and
                            // end match the real bounds of the string.
                        }
                        else
                        {
                            // This is a compiler error.
                            _reader.SinkCharacter();
                            current = new CSharpTokenizer.UnrecognizedStringEscapeToken();
                            return(true);
                        }
                    }
                }

                // Is it a newline?
                if (TokenChar.IsNewLine(_reader.CurrentCharacter))
                {
                    current = new CSharpTokenizer.NewlineInsideStringToken();
                    return(true);
                }

                // Create the token.
                if (_reader.SinkCharacter() != '\"')
                {
                    Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote.");
                }
                current = new StringLiteralToken();
                return(true);
            }
            // Identifier or keyword?
            else if
            (
                // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier.
                _reader.CurrentCharacter == '@' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                if (_reader.CurrentCharacter == '@')
                {
                    _reader.SinkCharacter();
                }

                // Now, the next character must be an identifier start.
                if (!_reader.SinkIdentifierStart())
                {
                    current = new ExpectedIdentifierToken();
                    return(true);
                }

                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);

                switch (identifierOrKeyword)
                {
                default:

                    if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0)
                    {
                        current = new KeywordToken();
                        return(true);
                    }

                    // If the identifier starts with '@' then we need to strip it off.
                    // The '@' is for escaping so that we can have an identifier called
                    // the same thing as a reserved keyword (i.e. class, if, foreach, etc)
                    string identifier = _reader.GetCurrentMatchedString(startPosition);
                    if (identifier.StartsWith("@", StringComparison.Ordinal))
                    {
                        identifier = identifier.Substring(1);
                    }

                    // Create the token.
                    current           = new IdentifierToken();
                    current.InnerText = identifier;
                    return(true);

                case "false":
                case "true":
                    current = new BooleanLiteralToken();
                    return(true);

                case "null":
                    current = new CSharpTokenizer.NullLiteralToken();
                    return(true);
                }
            }
            // Open scope
            else if (_reader.Sink("{"))
            {
                current = new CSharpTokenizer.OpenScopeToken();
                return(true);
            }
            // Close scope
            else if (_reader.Sink("}"))
            {
                current = new CSharpTokenizer.CloseScopeToken();
                return(true);
            }
            // Hexidecimal integer literal
            else if (_reader.SinkIgnoreCase("0x"))
            {
                // Sink the hex digits.
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return(true);
                }

                // Skip the L, U, l, u, ul, etc.
                _reader.SinkLongIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return(true);
            }
            // Decimal integer literal
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // reader.Skip the L, U, l, u, ul, etc.
                _reader.SinkLongIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return(true);
            }
            // Check for single-digit operators and punctuators
            else if (_reader.SinkOperatorOrPunctuator())
            {
                current = new OperatorOrPunctuatorToken();
                return(true);
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.Sink("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.Sink("#endif"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return(true);
            }

            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return(true);
        }
        private static void AssertAreEqualInternal(Token expectedToken, Token actualToken, int index = -1)
        {
            if ((expectedToken == null) && (actualToken == null))
            {
                return;
            }
            if (expectedToken == null)
            {
                Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is null, but actual is not null", index));
            }
            if (actualToken == null)
            {
                Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is not null, but actual is null", index));
            }
            Assert.AreEqual(expectedToken.GetType(), actualToken.GetType(),
                            LexerHelper.GetAssertErrorMessage($"actual type does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.StartPosition.Position, actualToken.Extent.StartPosition.Position,
                            LexerHelper.GetAssertErrorMessage($"actual Start Position does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.StartPosition.LineNumber, actualToken.Extent.StartPosition.LineNumber,
                            LexerHelper.GetAssertErrorMessage($"actual Start Line does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.StartPosition.ColumnNumber, actualToken.Extent.StartPosition.ColumnNumber,
                            LexerHelper.GetAssertErrorMessage($"actual Start Column does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.EndPosition.Position, actualToken.Extent.EndPosition.Position,
                            LexerHelper.GetAssertErrorMessage($"actual End Position does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.EndPosition.LineNumber, actualToken.Extent.EndPosition.LineNumber,
                            LexerHelper.GetAssertErrorMessage($"actual End Line does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.EndPosition.ColumnNumber, actualToken.Extent.EndPosition.ColumnNumber,
                            LexerHelper.GetAssertErrorMessage($"actual End Column does not match expected value", index));
            Assert.AreEqual(expectedToken.Extent.Text, actualToken.Extent.Text,
                            LexerHelper.GetAssertErrorMessage($"actual Text does not match expected value", index));
            switch (expectedToken)
            {
            case AliasIdentifierToken token:
                Assert.IsTrue(
                    AliasIdentifierToken.AreEqual((AliasIdentifierToken)expectedToken, (AliasIdentifierToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case AttributeCloseToken token:
                Assert.IsTrue(
                    AttributeCloseToken.AreEqual((AttributeCloseToken)expectedToken, (AttributeCloseToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case AttributeOpenToken token:
                Assert.IsTrue(
                    AttributeOpenToken.AreEqual((AttributeOpenToken)expectedToken, (AttributeOpenToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case BlockCloseToken token:
                Assert.IsTrue(
                    BlockCloseToken.AreEqual((BlockCloseToken)expectedToken, (BlockCloseToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case BlockOpenToken token:
                Assert.IsTrue(
                    BlockOpenToken.AreEqual((BlockOpenToken)expectedToken, (BlockOpenToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case BooleanLiteralToken token:
                Assert.IsTrue(
                    BooleanLiteralToken.AreEqual((BooleanLiteralToken)expectedToken, (BooleanLiteralToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case ColonToken token:
                Assert.IsTrue(
                    ColonToken.AreEqual((ColonToken)expectedToken, (ColonToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case CommaToken token:
                Assert.IsTrue(
                    CommaToken.AreEqual((CommaToken)expectedToken, (CommaToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case CommentToken token:
                Assert.IsTrue(
                    CommentToken.AreEqual((CommentToken)expectedToken, (CommentToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case DotOperatorToken token:
                Assert.IsTrue(
                    DotOperatorToken.AreEqual((DotOperatorToken)expectedToken, (DotOperatorToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case EqualsOperatorToken token:
                Assert.IsTrue(
                    EqualsOperatorToken.AreEqual((EqualsOperatorToken)expectedToken, (EqualsOperatorToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case IdentifierToken token:
                Assert.IsTrue(
                    IdentifierToken.AreEqual((IdentifierToken)expectedToken, (IdentifierToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case IntegerLiteralToken token:
                Assert.IsTrue(
                    IntegerLiteralToken.AreEqual((IntegerLiteralToken)expectedToken, (IntegerLiteralToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case NullLiteralToken token:
                Assert.IsTrue(
                    NullLiteralToken.AreEqual((NullLiteralToken)expectedToken, (NullLiteralToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case ParenthesisCloseToken token:
                Assert.IsTrue(
                    ParenthesisCloseToken.AreEqual((ParenthesisCloseToken)expectedToken, (ParenthesisCloseToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case ParenthesisOpenToken token:
                Assert.IsTrue(
                    ParenthesisOpenToken.AreEqual((ParenthesisOpenToken)expectedToken, (ParenthesisOpenToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case PragmaToken token:
                Assert.IsTrue(
                    PragmaToken.AreEqual((PragmaToken)expectedToken, (PragmaToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case RealLiteralToken token:
                Assert.IsTrue(
                    RealLiteralToken.AreEqual((RealLiteralToken)expectedToken, (RealLiteralToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case StatementEndToken token:
                Assert.IsTrue(
                    StatementEndToken.AreEqual((StatementEndToken)expectedToken, (StatementEndToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case StringLiteralToken token:
                Assert.IsTrue(
                    StringLiteralToken.AreEqual((StringLiteralToken)expectedToken, (StringLiteralToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            case WhitespaceToken token:
                Assert.IsTrue(
                    WhitespaceToken.AreEqual((WhitespaceToken)expectedToken, (WhitespaceToken)actualToken),
                    LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index)
                    );
                break;

            default:
                throw new NotImplementedException($"Cannot compare type '{expectedToken.GetType().Name}'");
            }
        }
Beispiel #15
0
        /*
         * Method:  FindNextToken
         *
         * Find the next token. Return 'true' if one was found. False, otherwise.
         */
        override internal bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // VB docs claim whitespace is Unicode category Zs. However,
            // this category does not contain tabs. Assuming a less restrictive
            // definition for whitespace...
            if (_reader.SinkWhiteSpace())
            {
                while (_reader.SinkWhiteSpace())
                {
                }

                // Now, we need to check for the line continuation character.
                if (_reader.SinkLineContinuationCharacter())    // Line continuation is '_'
                {
                    // Save the current position because we may need to come back here.
                    int savePosition = _reader.Position - 1;

                    // Skip all whitespace after the '_'
                    while (_reader.SinkWhiteSpace())
                    {
                    }

                    // Now, skip all the newlines.
                    // Need at least one newline for this to count as line continuation.
                    int count = 0;
                    while (_reader.SinkNewLine())
                    {
                        ++count;
                    }

                    if (count > 0)
                    {
                        current = new VisualBasicTokenizer.LineContinuationToken();
                        return(true);
                    }

                    // Otherwise, fall back to plain old whitespace.
                    _reader.Position = savePosition;
                }

                current = new WhitespaceToken();
                return(true);
            }
            // Line terminators are separate from whitespace and are significant.
            else if (_reader.SinkNewLine())
            {
                // We want one token per line terminator.
                current = new VisualBasicTokenizer.LineTerminatorToken();
                return(true);
            }
            // Check for a comment--either those that start with ' or rem.
            else if (_reader.SinkLineCommentStart())
            {
                // Skip to the first EOL.
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return(true);
            }
            // Identifier or keyword?
            else if
            (
                // VB allows escaping of identifiers by surrounding them with []
                // In other words,
                //      Date is a keyword but,
                //      [Date] is an identifier.
                _reader.CurrentCharacter == '[' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                bool escapedIdentifier = false;
                if (_reader.CurrentCharacter == '[')
                {
                    escapedIdentifier = true;
                    _reader.SinkCharacter();

                    // Now, the next character must be an identifier start.
                    if (!_reader.SinkIdentifierStart())
                    {
                        current = new ExpectedIdentifierToken();
                        return(true);
                    }
                }

                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }

                // If this was an escaped identifier the we need to get the terminating ']'.
                if (escapedIdentifier)
                {
                    if (!_reader.Sink("]"))
                    {
                        current = new ExpectedIdentifierToken();
                        return(true);
                    }
                }
                else
                {
                    // Escaped identifiers are not allowed to have trailing type character.
                    _reader.SinkTypeCharacter(); // Type character is optional.
                }

                // An identifier that is only a '_' is illegal because it is
                // ambiguous with line continuation
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);
                if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]")
                {
                    current = new ExpectedIdentifierToken();
                    return(true);
                }

                // Make an upper-case version in order to check whether this may be a keyword.
                string upper = identifierOrKeyword.ToUpperInvariant();

                switch (upper)
                {
                default:

                    if (Array.IndexOf(s_keywordList, upper) >= 0)
                    {
                        current = new KeywordToken();
                        return(true);
                    }

                    // Create the token.
                    current = new IdentifierToken();

                    // Trim off the [] if this is an escaped identifier.
                    if (escapedIdentifier)
                    {
                        current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2);
                    }
                    return(true);

                case "FALSE":
                case "TRUE":
                    current = new BooleanLiteralToken();
                    return(true);
                }
            }
            // Is it a hex integer?
            else if (_reader.SinkHexIntegerPrefix())
            {
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return(true);
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return(true);
            }
            // Is it an octal integer?
            else if (_reader.SinkOctalIntegerPrefix())
            {
                if (!_reader.SinkMultipleOctalDigits())
                {
                    current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken();
                    return(true);
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new VisualBasicTokenizer.OctalIntegerLiteralToken();
                return(true);
            }
            // Is it a decimal integer?
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // Sink a suffix if there is one.
                _reader.SinkDecimalIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return(true);
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.SinkIgnoreCase("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.SinkIgnoreCase("#end if"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return(true);
            }
            // Is it a separator?
            else if (_reader.SinkSeparatorCharacter())
            {
                current = new VisualBasicTokenizer.SeparatorToken();
                return(true);
            }
            // Is it an operator?
            else if (_reader.SinkOperator())
            {
                current = new OperatorToken();
                return(true);
            }
            // A string?
            else if (_reader.Sink("\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return(true);
                }

                current = new StringLiteralToken();
                return(true);
            }


            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return(true);
        }
Beispiel #16
0
        /*
        * Method:  FindNextToken
        *
        * Find the next token. Return 'true' if one was found. False, otherwise.
        */
        internal override bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // VB docs claim whitespace is Unicode category Zs. However,
            // this category does not contain tabs. Assuming a less restrictive
            // definition for whitespace...
            if (_reader.SinkWhiteSpace())
            {
                while (_reader.SinkWhiteSpace())
                {
                }

                // Now, we need to check for the line continuation character.
                if (_reader.SinkLineContinuationCharacter())    // Line continuation is '_'
                {
                    // Save the current position because we may need to come back here.
                    int savePosition = _reader.Position - 1;

                    // Skip all whitespace after the '_'
                    while (_reader.SinkWhiteSpace())
                    {
                    }

                    // Now, skip all the newlines.
                    // Need at least one newline for this to count as line continuation.
                    int count = 0;
                    while (_reader.SinkNewLine())
                    {
                        ++count;
                    }

                    if (count > 0)
                    {
                        current = new VisualBasicTokenizer.LineContinuationToken();
                        return true;
                    }

                    // Otherwise, fall back to plain old whitespace.
                    _reader.Position = savePosition;
                }

                current = new WhitespaceToken();
                return true;
            }
            // Line terminators are separate from whitespace and are significant.
            else if (_reader.SinkNewLine())
            {
                // We want one token per line terminator.
                current = new VisualBasicTokenizer.LineTerminatorToken();
                return true;
            }
            // Check for a comment--either those that start with ' or rem.
            else if (_reader.SinkLineCommentStart())
            {
                // Skip to the first EOL.
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return true;
            }
            // Identifier or keyword?
            else if
            (
                // VB allows escaping of identifiers by surrounding them with []
                // In other words,
                //      Date is a keyword but,
                //      [Date] is an identifier.
                _reader.CurrentCharacter == '[' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                bool escapedIdentifier = false;
                if (_reader.CurrentCharacter == '[')
                {
                    escapedIdentifier = true;
                    _reader.SinkCharacter();

                    // Now, the next character must be an identifier start.
                    if (!_reader.SinkIdentifierStart())
                    {
                        current = new ExpectedIdentifierToken();
                        return true;
                    }
                }

                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }

                // If this was an escaped identifier the we need to get the terminating ']'.
                if (escapedIdentifier)
                {
                    if (!_reader.Sink("]"))
                    {
                        current = new ExpectedIdentifierToken();
                        return true;
                    }
                }
                else
                {
                    // Escaped identifiers are not allowed to have trailing type character.
                    _reader.SinkTypeCharacter(); // Type character is optional.
                }

                // An identifier that is only a '_' is illegal because it is
                // ambiguous with line continuation
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);
                if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]")
                {
                    current = new ExpectedIdentifierToken();
                    return true;
                }

                // Make an upper-case version in order to check whether this may be a keyword.
                string upper = identifierOrKeyword.ToUpper(CultureInfo.InvariantCulture);

                switch (upper)
                {
                    default:

                        if (Array.IndexOf(s_keywordList, upper) >= 0)
                        {
                            current = new KeywordToken();
                            return true;
                        }

                        // Create the token.
                        current = new IdentifierToken();

                        // Trim off the [] if this is an escaped identifier.
                        if (escapedIdentifier)
                        {
                            current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2);
                        }
                        return true;
                    case "FALSE":
                    case "TRUE":
                        current = new BooleanLiteralToken();
                        return true;
                }
            }
            // Is it a hex integer?
            else if (_reader.SinkHexIntegerPrefix())
            {
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return true;
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return true;
            }
            // Is it an octal integer?
            else if (_reader.SinkOctalIntegerPrefix())
            {
                if (!_reader.SinkMultipleOctalDigits())
                {
                    current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken();
                    return true;
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new VisualBasicTokenizer.OctalIntegerLiteralToken();
                return true;
            }
            // Is it a decimal integer?
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // Sink a suffix if there is one.
                _reader.SinkDecimalIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return true;
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.SinkIgnoreCase("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.SinkIgnoreCase("#end if"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return true;
            }
            // Is it a separator?
            else if (_reader.SinkSeparatorCharacter())
            {
                current = new VisualBasicTokenizer.SeparatorToken();
                return true;
            }
            // Is it an operator?
            else if (_reader.SinkOperator())
            {
                current = new OperatorToken();
                return true;
            }
            // A string?
            else if (_reader.Sink("\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }
                while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return true;
                }

                current = new StringLiteralToken();
                return true;
            }

            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return true;
        }
        /*
        * Method:  FindNextToken
        * 
        * Find the next token. Return 'true' if one was found. False, otherwise.
        */
        override internal bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // Dealing with whitespace?
            if (_reader.SinkMultipleWhiteSpace())
            {
                current = new WhitespaceToken();
                return true;
            }
            // Check for one-line comment
            else if (_reader.Sink("//"))
            {
                // Looks like a one-line comment. Follow it to the End-of-line
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return true;
            }
            // Check for multi-line comment
            else if (_reader.Sink("/*"))
            {
                _reader.SinkUntil("*/");

                // Was the ending */ found?
                if (_reader.EndOfLines)
                {
                    // No. There was a /* without a */. Return this a syntax error token.
                    current = new CSharpTokenizer.EndOfFileInsideCommentToken();
                    return true;
                }

                current = new CommentToken();
                return true;
            }
            // Handle chars
            else if (_reader.Sink("\'"))
            {
                while (_reader.CurrentCharacter != '\'')
                {
                    if (_reader.Sink("\\"))
                    {
                        /* reader.Skip the escape sequence. 
                            This isn't exactly right. We should detect:
                            
                            simple-escape-sequence: one of 
                            \' \" \\ \0 \a \b \f \n \r \t \v 
                            
                            hexadecimal-escape-sequence: 
                            \x   hex-digit   hex-digit[opt]   hex-digit[opt]  hex-digit[opt]                                
                        */
                    }

                    _reader.SinkCharacter();
                }

                if (_reader.SinkCharacter() != '\'')
                {
                    Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick.");
                }
                current = new CSharpTokenizer.CharLiteralToken();
                return true;
            }
            // Check for verbatim string
            else if (_reader.Sink("@\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }
                while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string 
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return true;
                }

                // reader.Skip the ending quote.
                current = new StringLiteralToken();
                current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1);
                return true;
            }
            // Check for a quoted string.
            else if (_reader.Sink("\""))
            {
                while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral())
                {
                    // See if we have an escape sequence.
                    if (_reader.SinkCharacter() == '\\')
                    {
                        // This is probably an escape character.
                        if (_reader.SinkStringEscape())
                        {
                            // This isn't nearly right. We just do barely enough to make a string
                            // with an embedded escape sequence return _some_ string whose start and 
                            // end match the real bounds of the string.
                        }
                        else
                        {
                            // This is a compiler error. 
                            _reader.SinkCharacter();
                            current = new CSharpTokenizer.UnrecognizedStringEscapeToken();
                            return true;
                        }
                    }
                }

                // Is it a newline?
                if (TokenChar.IsNewLine(_reader.CurrentCharacter))
                {
                    current = new CSharpTokenizer.NewlineInsideStringToken();
                    return true;
                }

                // Create the token.
                if (_reader.SinkCharacter() != '\"')
                {
                    Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote.");
                }
                current = new StringLiteralToken();
                return true;
            }
            // Identifier or keyword?
            else if
            (
                // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier.
                _reader.CurrentCharacter == '@' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                if (_reader.CurrentCharacter == '@')
                {
                    _reader.SinkCharacter();
                }

                // Now, the next character must be an identifier start.
                if (!_reader.SinkIdentifierStart())
                {
                    current = new ExpectedIdentifierToken();
                    return true;
                }

                // Sink the rest of the identifier.                     
                while (_reader.SinkIdentifierPart())
                {
                }
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);

                switch (identifierOrKeyword)
                {
                    default:

                        if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0)
                        {
                            current = new KeywordToken();
                            return true;
                        }

                        // If the identifier starts with '@' then we need to strip it off.
                        // The '@' is for escaping so that we can have an identifier called
                        // the same thing as a reserved keyword (i.e. class, if, foreach, etc)
                        string identifier = _reader.GetCurrentMatchedString(startPosition);
                        if (identifier.StartsWith("@", StringComparison.Ordinal))
                        {
                            identifier = identifier.Substring(1);
                        }

                        // Create the token.
                        current = new IdentifierToken();
                        current.InnerText = identifier;
                        return true;
                    case "false":
                    case "true":
                        current = new BooleanLiteralToken();
                        return true;
                    case "null":
                        current = new CSharpTokenizer.NullLiteralToken();
                        return true;
                }
            }
            // Open scope
            else if (_reader.Sink("{"))
            {
                current = new CSharpTokenizer.OpenScopeToken();
                return true;
            }
            // Close scope
            else if (_reader.Sink("}"))
            {
                current = new CSharpTokenizer.CloseScopeToken();
                return true;
            }
            // Hexidecimal integer literal
            else if (_reader.SinkIgnoreCase("0x"))
            {
                // Sink the hex digits.
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return true;
                }

                // Skip the L, U, l, u, ul, etc.                    
                _reader.SinkLongIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return true;
            }
            // Decimal integer literal
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // reader.Skip the L, U, l, u, ul, etc.                    
                _reader.SinkLongIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return true;
            }
            // Check for single-digit operators and punctuators
            else if (_reader.SinkOperatorOrPunctuator())
            {
                current = new OperatorOrPunctuatorToken();
                return true;
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.Sink("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.Sink("#endif"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return true;
            }

            // We didn't recognize the token, so this is a syntax error. 
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return true;
        }