Пример #1
0
        public static ExternStatement Parse(Scope scope, KeywordToken keywordToken)
        {
            var code = scope.Code;
            var ret  = new ExternStatement(scope);

            ret.AddToken(keywordToken);

            var dataType = DataType.TryParse(new DataType.ParseArgs
            {
                Code             = code,
                Scope            = scope,
                VariableCallback = (name) =>
                {
                    return(scope.DefinitionProvider.GetLocal <VariableDefinition>(code.Position, name).FirstOrDefault());
                },
                DataTypeCallback = (name) =>
                {
                    return(scope.DefinitionProvider.GetAny <DataTypeDefinition>(code.Position, name).FirstOrDefault());
                },
                TokenCreateCallback = (token) =>
                {
                    ret.AddToken(token);
                },
                VisibleModel = true
            });

            if (dataType == null)
            {
                dataType = DataType.Int;
            }

            // Name
            if (!code.ReadWord())
            {
                return(ret);
            }
            var funcName     = code.Text;
            var funcNameSpan = code.Span;
            var funcDef      = scope.DefinitionProvider.GetAny <FunctionDefinition>(funcNameSpan.Start, funcName).FirstOrDefault();

            if (funcDef != null)
            {
                ret.AddToken(new IdentifierToken(scope, funcNameSpan, funcName, funcDef));

                // Arguments
                if (!code.ReadExact('('))
                {
                    return(ret);
                }
                ret.AddToken(ArgsToken.Parse(scope, new OperatorToken(scope, code.Span, "("), funcDef.Signature));

                ParseFunctionAttributes(scope, ret);
            }
            else
            {
                ret.AddToken(new UnknownToken(scope, funcNameSpan, funcName));
            }

            return(ret);
        }
Пример #2
0
        public Token[] Tokenize()
        {
            var tokens = new List <Token>();

            var builder = new StringBuilder();

            while (!eof())
            {
                skip(CharType.WhiteSpace); //white space has no meaning besides sperating tokens (we're not python!)
                switch (peekType())
                {
                case CharType.Alpha:     //start of identifier
                    readToken(builder, CharType.AlphaNumeric);
                    string s = builder.ToString();
                    if (KeywordToken.IsKeyword(s))
                    {
                        tokens.Add(new KeywordToken(s));
                    }
                    else
                    {
                        tokens.Add(new IdentifierToken(s));
                    }
                    builder.Clear();
                    break;

                case CharType.Numeric:     //start of number literal
                    readToken(builder, CharType.Numeric);
                    tokens.Add(new NumberLiteralToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.Operator:
                    readToken(builder, CharType.Operator);
                    tokens.Add(new OperatorToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.OpenBrace:
                    tokens.Add(new OpenBraceToken(next().ToString()));
                    break;

                case CharType.CloseBrace:
                    tokens.Add(new CloseBraceToken(next().ToString()));
                    break;

                case CharType.ArgSeperator:
                    tokens.Add(new ArgSeperatorToken(next().ToString()));
                    break;

                case CharType.StatementSeperator:
                    tokens.Add(new StatementSperatorToken(next().ToString()));
                    break;

                default:
                    throw new Exception("The tokenizer found an unidentifiable character.");
                }
            }

            return(tokens.ToArray());
        }
Пример #3
0
        private static void ParseTag(Scope scope, GroupToken parent, KeywordToken tagToken, string[] endTokens)
        {
            var code = scope.Code;

            parent.AddToken(tagToken);

            var resetPos = code.Position;

            if (code.ReadTagName() && ProbeEnvironment.IsValidTagName(code.Text))
            {
                parent.AddToken(new KeywordToken(scope, code.Span, code.Text));
                if (code.ReadStringLiteral())
                {
                    parent.AddToken(new StringLiteralToken(scope, code.Span, code.Text));
                    return;
                }
            }
            else
            {
                code.Position = resetPos;
            }

            var exp = ExpressionToken.TryParse(scope, endTokens);

            if (exp != null)
            {
                parent.AddToken(exp);
            }
        }
Пример #4
0
        public static ReturnStatement Parse(Scope scope, KeywordToken returnToken)
        {
            var ret = new ReturnStatement(scope);

            ret.AddToken(returnToken);

            var code = scope.Code;

            if (code.ReadExact(';'))
            {
                ret.AddToken(new StatementEndToken(scope, code.Span));
                return(ret);
            }

            var exp = ExpressionToken.TryParse(scope, null, expectedDataType: scope.ReturnDataType);

            if (exp != null)
            {
                ret.AddToken(exp);
            }

            if (code.ReadExact(';'))
            {
                ret.AddToken(new StatementEndToken(scope, code.Span));
            }

            return(ret);
        }
Пример #5
0
        public static RowColStatement Parse(Scope scope, KeywordToken rowColToken)
        {
            var ret = new RowColStatement(scope);

            ret.AddToken(rowColToken);

            var code = scope.Code;

            if (code.PeekExact('+') || code.PeekExact('-'))
            {
                ret.AddToken(new OperatorToken(scope, code.MovePeekedSpan(), code.Text));
            }
            if (code.ReadNumber())
            {
                ret.AddToken(new NumberToken(scope, code.Span, code.Text));
            }

            var exp = ExpressionToken.TryParse(scope, null);

            if (exp != null)
            {
                ret.AddToken(exp);
            }

            if (code.PeekExact(';'))
            {
                ret.AddToken(new StatementEndToken(scope, code.Span));
            }

            return(ret);
        }
Пример #6
0
        public void Test_3_5_4_Keywords_C()
        {
            // An unadorned identifier is an identifier which is not immediately preceded by a '#'.
            // If a ':' followed by an '=' immediately follows an unadorned identifier,
            // with no intervening white space, then the token is to be parsed as an
            // identifier followed by an assignmentOperator not as an keyword followed by an '='.
            Scanner lexer = this.GetLexer("isGood1Time:true");
            // Identifier token
            object obj = lexer.GetToken();

            Assert.IsInstanceOfType(obj, typeof(KeywordToken));
            KeywordToken token = (KeywordToken)obj;

            Assert.IsTrue(token.IsValid);
            Assert.IsNull(token.ScanError);
            Assert.AreEqual(0, token.StartPosition.Position);
            Assert.AreEqual(11, token.StopPosition.Position);
            Assert.AreEqual("isGood1Time:", token.Value);
            // Assignment token
            obj = lexer.GetToken();
            Assert.IsInstanceOfType(obj, typeof(IdentifierToken));
            IdentifierToken idf = (IdentifierToken)obj;

            Assert.IsTrue(idf.IsValid);
            Assert.IsNull(idf.ScanError);
            Assert.AreEqual(12, idf.StartPosition.Position);
            Assert.AreEqual(15, idf.StopPosition.Position);
            Assert.AreEqual("true", idf.Value);

            // Should be the last one
            obj = lexer.GetToken();
            Assert.IsInstanceOfType(obj, typeof(EofToken));
        }
Пример #7
0
        /*
         * type_spec : 'void' | 'char' | 'short' | 'int' | 'long' | 'float'
         | 'double' | 'signed' | 'unsigned'
         | struct_or_union_spec
         | enum_spec
         | typedef_name
         |              ;
         */

        private IParseResult ParseTypeSpec()
        {
            KeywordToken token = null;

            if (Accept(new[]
            {
                KeywordType.VOID, KeywordType.CHAR, KeywordType.UNSIGNED, KeywordType.LONG, KeywordType.INT,
                KeywordType.SIGNED, KeywordType.DOUBLE, KeywordType.SHORT, KeywordType.FLOAT
            }, ref token))
            {
                return(new SuccessParseResult(new TypeSpec(token)));
            }

            var structSpec = ParseStructSpec();

            if (!structSpec.IsSuccess || !structSpec.IsNullStat())
            {
                return(structSpec);
            }

            // TODO enum_spec
            // TODO typedef_name

            return(new SuccessParseResult(new NullStat()));
        }
        public IToken Produce()
        {
            var context = this.Context;
            var text    = context.Text;
            var length  = context.Length;

            var c = text[context.Index];

            if (c == ':')
            {
                var nameCharsCount = 0;
                var initialIndex   = context.Index;
                var index          = initialIndex + 1;
                var column         = context.Column + 1;

                while (true)
                {
                    if (index == length)
                    {
                        break;
                    }

                    c = text[index];

                    if (c == ':')
                    {
                        ThrowBadKeywordException(context.Line, context.Column);
                    }

                    if (!TinyLispHelper.IsAcceptableSymbolNameChar(c))
                    {
                        break;
                    }

                    nameCharsCount++;
                    index++;
                    column++;
                }

                if (nameCharsCount == 0)
                {
                    ThrowBadKeywordException(context.Line, context.Column);
                }

                var delta       = index - initialIndex;
                var keywordName = text.Substring(initialIndex, delta);
                var token       = new KeywordToken(
                    keywordName,
                    new Position(context.Line, context.Column),
                    delta);
                context.Advance(delta, 0, column);
                return(token);
            }
            else
            {
                return(null);
            }
        }
Пример #9
0
        private Option <IToken> FlushBuffer(
            StringBuilder buffer,
            ref uint absolutePosition,
            uint lineNumber,
            ref uint lexemeStartPositionInLine
            )
        {
            if (buffer.Length > 0)
            {
                var lexeme = buffer.ToString();

                var result =
                    KeywordToken.FromString(
                        lexeme,
                        absolutePosition,
                        lineNumber,
                        lexemeStartPositionInLine
                        ) ||
                    IdentifierToken.FromString(
                        lexeme,
                        absolutePosition,
                        lineNumber,
                        lexemeStartPositionInLine
                        ) ||
                    IntegerLiteralToken.FromString(
                        lexeme,
                        absolutePosition,
                        lineNumber,
                        lexemeStartPositionInLine
                        ) ||
                    RealLiteralToken.FromString(
                        lexeme,
                        absolutePosition,
                        lineNumber,
                        lexemeStartPositionInLine
                        ) ||
                    new UnrecognizedToken(
                        lexeme,
                        absolutePosition,
                        lineNumber,
                        lexemeStartPositionInLine
                        )
                ;

                buffer.Clear();

                absolutePosition          += (uint)lexeme.Length;
                lexemeStartPositionInLine += (uint)lexeme.Length;

                return(result);
            }
            else
            {
                return(Option <IToken> .None);
            }
        }
Пример #10
0
        void LexerSmokeTestSecond()
        {
            var sourceCode =
                @"fn+let-const*as/while//hello
continue:=if->(else=> _==int){""as\nc\rc\tc""
<=p>1.5E-3!=231231231231</p>
}
";
            var expectedTokens = new Token[]
            {
                new KeywordToken(Keyword.Fn, (0, 0)), new OperatorToken(Operator.Plus, (0, 2)), new KeywordToken(Keyword.Let, (0, 3)),
                new OperatorToken(Operator.Minus, (0, 6)), new KeywordToken(Keyword.Const, (0, 7)), new OperatorToken(Operator.Mult, (0, 12)),
                new KeywordToken(Keyword.As, (0, 13)), new OperatorToken(Operator.Divide, (0, 15)), new KeywordToken(Keyword.While, (0, 16)),

                new KeywordToken(Keyword.Continue, (1, 0)), new OperatorToken(Operator.Colon, (1, 8)), new OperatorToken(Operator.Assign, (1, 9)),
                new KeywordToken(Keyword.If, (1, 10)), new OperatorToken(Operator.Arrow, (1, 12)), new OperatorToken(Operator.LeftParen, (1, 14)),
                new KeywordToken(Keyword.Else, (1, 15)), new OperatorToken(Operator.Assign, (1, 19)), new OperatorToken(Operator.GreaterThan, (1, 20)),
                new IdentifierToken("_", (1, 22)), new OperatorToken(Operator.Equal, (1, 23)), new IdentifierToken("int", (1, 25)),
                new OperatorToken(Operator.RightParen, (1, 28)), new OperatorToken(Operator.LeftBrace, (1, 29)),
                new StringLiteralToken("as\nc\rc\tc", (1, 30)),

                new OperatorToken(Operator.LessEqual, (2, 0)), new IdentifierToken("p", (2, 2)), new OperatorToken(Operator.GreaterThan, (2, 3)),
                new DoubleLiteralToken(1.5E-3, (2, 4)), new OperatorToken(Operator.NotEqual, (2, 10)), new UInt64LiteralToken(231231231231, (2, 12)),
                new OperatorToken(Operator.LessThan, (2, 24)), new OperatorToken(Operator.Divide, (2, 25)), new IdentifierToken("p", (2, 26)),
                new OperatorToken(Operator.GreaterThan, (2, 27)),

                new OperatorToken(Operator.RightBrace, (3, 0)),
            };

            var lexer  = new Lexer(sourceCode);
            var tokens = lexer.Parse().ToList();

            Assert.Equal(expectedTokens.Length, tokens.Count);

            for (var i = 0; i < expectedTokens.Length; i++)
            {
                var expectedToken = expectedTokens[i];
                var actualToken   = tokens[i];

                AssertJsonEqual(expectedToken, actualToken);
            }
        }
Пример #11
0
        /*
         * type_qualifier : 'const' | 'volatile'
         *  ;
         */

        private IParseResult ParseTypeQualifier()
        {
            KeywordToken token = null;

            if (Accept(new [] { KeywordType.CONST, KeywordType.VOLATILE }, ref token))
            {
                return(new SuccessParseResult(new TypeQualifier(token)));
            }

            return(new SuccessParseResult(new NullStat()));
        }
Пример #12
0
 // Initializes the node after being parsed by the parser.
 protected internal void SetContents(BinarySelectorToken openingDelimiter, BinarySelectorToken closingDelimiter,
                                     KeywordToken apiConvention, IEnumerable <IPrimitiveCallParameterToken> parameters)
 {
     this.OpeningDelimiter = openingDelimiter;
     this.ClosingDelimiter = closingDelimiter;
     this.ApiConvention    = apiConvention;
     this.ApiParameters.Clear();
     foreach (IPrimitiveCallParameterToken param in parameters)
     {
         this.ApiParameters.Add(param);
     }
 }
Пример #13
0
        public static HeaderStatement Parse(Scope scope, KeywordToken headerToken)
        {
            var ret = new HeaderStatement(scope);

            ret.AddToken(headerToken);

            if (!scope.Code.PeekExact('{'))
            {
                return(ret);
            }
            ret.AddToken(BracesToken.Parse(scope, null));

            return(ret);
        }
Пример #14
0
        private int Keyword()
        {
            char ch = data[chBaseIndex];

            if (Tokens.Count == 0 ||
                Tokens.Last().GetType() == typeof(SeparatorToken) ||
                Tokens.Last().GetType() == typeof(OperatorToken))
            {
                foreach (KeyValuePair <string, NssKeywords> kvp in KeywordToken.Map)
                {
                    if (chBaseIndex + kvp.Key.Length >= data.Length)
                    {
                        continue; // This would overrun us.
                    }

                    string strFromData = data.Substring(chBaseIndex, kvp.Key.Length);
                    if (strFromData == kvp.Key)
                    {
                        // We're matched a keyword, e.g. 'int ', but we might have, e.g. 'int integral', and the
                        // 'integral' is an identifier. So let's only accept a keyword if the character proceeding it
                        // is a separator or an operator.

                        int  chNextAlongIndex = chBaseIndex + kvp.Key.Length;
                        bool accept           = false;

                        if (!accept)
                        {
                            char chNextAlong = data[chNextAlongIndex];
                            accept = SeparatorToken.Map.ContainsKey(chNextAlong) || OperatorToken.Map.ContainsKey(chNextAlong);
                        }

                        if (accept)
                        {
                            KeywordToken keyword = new KeywordToken();
                            keyword.m_Keyword = kvp.Value;

                            int chNewBaseIndex = chNextAlongIndex;
                            AttachDebugData(keyword, DebugRanges, chBaseIndex, chNewBaseIndex - 1);

                            Tokens.Add(keyword);
                            chBaseIndex = chNewBaseIndex;
                            break;
                        }
                    }
                }
            }

            return(chBaseIndex);
        }
Пример #15
0
        /*
         * storage_class_spec : 'auto' | 'register' | 'static' | 'extern' | 'typedef'
         *              ;
         */

        private IParseResult ParseStorageClassSpec()
        {
            KeywordToken token = null;

            if (Accept(new[]
            {
                KeywordType.AUTO, KeywordType.REGISTER, KeywordType.STATIC,
                KeywordType.EXTERN, KeywordType.TYPEDEF
            }, ref token))
            {
                return(new SuccessParseResult(new StorageClassSpec(token)));
            }

            return(new SuccessParseResult(new NullStat()));
        }
Пример #16
0
        public override TokenizationResult Tokenize(ref LexerRuntimeInfo info)
        {
            var startPosition = info.Reader.CaptureCurrentPosition();

            var found = _keywordsTrie.TryFind(info.Reader, out var keyword, out var readLength);

            if (found == false)
            {
                return(TokenizationResult.Failed());
            }

            var token  = new KeywordToken(startPosition.Value, readLength, keyword);
            var result = TokenizationResult.Successful(token);

            return(EnsureTrailingSpecialChar(ref info, result));
        }
Пример #17
0
        public static FormatStatement Parse(Scope scope, KeywordToken formatToken)
        {
            var ret = new FormatStatement(scope);

            ret.AddToken(formatToken);

            var code = scope.Code;

            while (true)
            {
                if (code.ReadExact(';'))
                {
                    ret.AddToken(new StatementEndToken(scope, code.Span));
                    break;
                }

                var word = code.PeekWordR();
                if (string.IsNullOrEmpty(word))
                {
                    break;
                }

                if (word == "rows" || word == "cols" || word == "genpages" || word == "outfile")
                {
                    ret.AddToken(new KeywordToken(scope, code.MovePeekedSpan(), word));
                    if (code.ReadExact('='))
                    {
                        ret.AddToken(new OperatorToken(scope, code.Span, "="));
                    }
                    var exp = ExpressionToken.TryParse(scope, _endTokens);
                    if (exp != null)
                    {
                        ret.AddToken(exp);
                    }
                    else
                    {
                        break;
                    }
                }
                else
                {
                    break;
                }
            }

            return(ret);
        }
Пример #18
0
        public void Test_3_5_4_Keywords_A()
        {
            Scanner lexer = this.GetLexer("isGood1Time:");
            object  obj   = lexer.GetToken();

            Assert.IsInstanceOfType(obj, typeof(KeywordToken));
            KeywordToken token = (KeywordToken)obj;

            Assert.IsTrue(token.IsValid);
            Assert.IsNull(token.ScanError);
            Assert.AreEqual(0, token.StartPosition.Position);
            Assert.AreEqual(11, token.StopPosition.Position);
            Assert.AreEqual("isGood1Time:", token.Value);
            // Should be the last one
            obj = lexer.GetToken();
            Assert.IsInstanceOfType(obj, typeof(EofToken));
        }
Пример #19
0
        public IfStatement(
            IValueGetter condition,
            IExecutable trueBlock,
            IExecutable falseBlock,
            KeywordToken keywordToken)
        {
            if (condition.GetValueType() != typeof(bool))
            {
                throw new ScriptParsingException(
                          source: keywordToken,
                          message: $"Condition of {keywordToken} statement is not a boolean value: type {condition.GetValueType().Name}");
            }

            this.condition  = condition;
            this.trueBlock  = trueBlock;
            this.falseBlock = falseBlock;
        }
        protected virtual AnnotationNode ParseAnnotation(InterchangeElementNode nodeForAnnotation)
        {
            // TODO : Move constants out of code into a the InterchangeFormatConstants class
            // TODO : Move error messages out of code into a the InterchangeFormatErrors class

            // PARSE: <annotation> ::= ’Annotation’ ’key:’ quotedString ’value:’ quotedString <elementSeparator>
            AnnotationNode result = this.CreateAnnotationNode(nodeForAnnotation);
            Token          token  = this.GetNextTokenxx();
            StringToken    str    = token as StringToken;

            if (str == null)
            {
                this.ReportParserError(result, "Missing annotation key.", token);
            }
            result.Key = str;

            token = this.GetNextTokenxx();
            KeywordToken cmd = token as KeywordToken;

            if ((cmd == null) || (cmd.Value != "value:"))
            {
                this.ReportParserError("Missing annotation #value: keyword.", token);
            }

            token = this.GetNextTokenxx();
            str   = token as StringToken;
            if (str == null)
            {
                this.ReportParserError(result, "Missing annotation value.", token);
            }
            result.Value = str;

            token = this.GetNextTokenxx();
            if (!(token is EofToken))
            {
                this.ReportParserError(result, "Unexpected code found after annotation value.", token);
                result.Key   = null; // This is to avoid something like: Annotation key: 'x' value: 'y' crash: 'yes'.
                result.Value = null; // This is to avoid something like: Annotation key: 'x' value: 'y' crash: 'yes'.
                return(result);
            }

            return(result);
        }
Пример #21
0
        public static CreateStatement ParseAlter(Scope scope, KeywordToken alterToken)
        {
            scope       = scope.Clone();
            scope.Hint |= ScopeHint.SuppressStatementStarts | ScopeHint.SuppressFunctionDefinition | ScopeHint.SuppressFunctionCall | ScopeHint.SuppressLogic;

            var ret = new CreateStatement(scope);

            ret.AddToken(alterToken);

            var code = scope.Code;

            var word = code.PeekWordR();

            if (word == "table")
            {
                ret.AddToken(new KeywordToken(scope, code.MovePeekedSpan(), "table"));
                ret.ParseAlterTable();
            }

            return(ret);
        }
Пример #22
0
        /*
         * jump_stat	: 'continue' ';'
         | 'break' ';'
         | 'return' exp ';'
         | 'return'	';'
         |  ;
         */

        private IParseResult ParseJumpStat()
        {
            KeywordToken token = null;

            if (Accept(new [] { KeywordType.CONTINUE, KeywordType.BREAK }, ref token))
            {
                Expect(OperatorType.SEMICOLON);
                return(new SuccessParseResult(new JumpStat(token)));
            }

            if (Accept(KeywordType.RETURN))
            {
                var exp = ParseExp();
                if (!exp.IsSuccess)
                {
                    return(exp);
                }

                Expect(OperatorType.SEMICOLON);
                return(new SuccessParseResult(new ReturnStat(exp.ResultNode)));
            }

            return(new SuccessParseResult(new NullStat()));
        }
        protected internal virtual InterchangeVersionIdentifierNode ParseVersionId()
        {
            // TODO : Move constants out of code into a the InterchangeFormatConstants class

            // PARSE: <interchangeVersionIdentifier> ::= 'Smalltalk' 'interchangeVersion:' <versionId>
            //      <versionId> ::= quotedString
            InterchangeVersionIdentifierNode result = this.CreateInterchangeVersionIdentifierNode();
            Token           token = this.GetNextTokenxx();
            IdentifierToken idt   = token as IdentifierToken;

            if ((idt == null) || (idt.Value != "Smalltalk"))
            {
                this.ReportParserError(result, InterchangeFormatErrors.MissingInterchangeVersionIdentifier, token);
                return(result);
            }

            token = this.GetNextTokenxx();
            KeywordToken kwt = token as KeywordToken;

            if ((kwt == null) || (kwt.Value != "interchangeVersion:"))
            {
                this.ReportParserError(result, InterchangeFormatErrors.MissingInterchangeVersionIdentifier, token);
                return(result);
            }

            token = this.GetNextTokenxx();
            StringToken versionId = token as StringToken;

            if (versionId == null)
            {
                this.ReportParserError(result, InterchangeFormatErrors.MissingInterchangeVersionIdentifier, token);
                return(result);
            }

            return(new InterchangeVersionIdentifierNode(versionId));
        }
Пример #24
0
        /*
         * Method:  FindNextToken
         *
         * Find the next token. Return 'true' if one was found. False, otherwise.
         */
        override internal bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // VB docs claim whitespace is Unicode category Zs. However,
            // this category does not contain tabs. Assuming a less restrictive
            // definition for whitespace...
            if (_reader.SinkWhiteSpace())
            {
                while (_reader.SinkWhiteSpace())
                {
                }

                // Now, we need to check for the line continuation character.
                if (_reader.SinkLineContinuationCharacter())    // Line continuation is '_'
                {
                    // Save the current position because we may need to come back here.
                    int savePosition = _reader.Position - 1;

                    // Skip all whitespace after the '_'
                    while (_reader.SinkWhiteSpace())
                    {
                    }

                    // Now, skip all the newlines.
                    // Need at least one newline for this to count as line continuation.
                    int count = 0;
                    while (_reader.SinkNewLine())
                    {
                        ++count;
                    }

                    if (count > 0)
                    {
                        current = new VisualBasicTokenizer.LineContinuationToken();
                        return(true);
                    }

                    // Otherwise, fall back to plain old whitespace.
                    _reader.Position = savePosition;
                }

                current = new WhitespaceToken();
                return(true);
            }
            // Line terminators are separate from whitespace and are significant.
            else if (_reader.SinkNewLine())
            {
                // We want one token per line terminator.
                current = new VisualBasicTokenizer.LineTerminatorToken();
                return(true);
            }
            // Check for a comment--either those that start with ' or rem.
            else if (_reader.SinkLineCommentStart())
            {
                // Skip to the first EOL.
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return(true);
            }
            // Identifier or keyword?
            else if
            (
                // VB allows escaping of identifiers by surrounding them with []
                // In other words,
                //      Date is a keyword but,
                //      [Date] is an identifier.
                _reader.CurrentCharacter == '[' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                bool escapedIdentifier = false;
                if (_reader.CurrentCharacter == '[')
                {
                    escapedIdentifier = true;
                    _reader.SinkCharacter();

                    // Now, the next character must be an identifier start.
                    if (!_reader.SinkIdentifierStart())
                    {
                        current = new ExpectedIdentifierToken();
                        return(true);
                    }
                }

                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }

                // If this was an escaped identifier the we need to get the terminating ']'.
                if (escapedIdentifier)
                {
                    if (!_reader.Sink("]"))
                    {
                        current = new ExpectedIdentifierToken();
                        return(true);
                    }
                }
                else
                {
                    // Escaped identifiers are not allowed to have trailing type character.
                    _reader.SinkTypeCharacter(); // Type character is optional.
                }

                // An identifier that is only a '_' is illegal because it is
                // ambiguous with line continuation
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);
                if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]")
                {
                    current = new ExpectedIdentifierToken();
                    return(true);
                }

                // Make an upper-case version in order to check whether this may be a keyword.
                string upper = identifierOrKeyword.ToUpperInvariant();

                switch (upper)
                {
                default:

                    if (Array.IndexOf(s_keywordList, upper) >= 0)
                    {
                        current = new KeywordToken();
                        return(true);
                    }

                    // Create the token.
                    current = new IdentifierToken();

                    // Trim off the [] if this is an escaped identifier.
                    if (escapedIdentifier)
                    {
                        current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2);
                    }
                    return(true);

                case "FALSE":
                case "TRUE":
                    current = new BooleanLiteralToken();
                    return(true);
                }
            }
            // Is it a hex integer?
            else if (_reader.SinkHexIntegerPrefix())
            {
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return(true);
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return(true);
            }
            // Is it an octal integer?
            else if (_reader.SinkOctalIntegerPrefix())
            {
                if (!_reader.SinkMultipleOctalDigits())
                {
                    current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken();
                    return(true);
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new VisualBasicTokenizer.OctalIntegerLiteralToken();
                return(true);
            }
            // Is it a decimal integer?
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // Sink a suffix if there is one.
                _reader.SinkDecimalIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return(true);
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.SinkIgnoreCase("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.SinkIgnoreCase("#end if"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return(true);
            }
            // Is it a separator?
            else if (_reader.SinkSeparatorCharacter())
            {
                current = new VisualBasicTokenizer.SeparatorToken();
                return(true);
            }
            // Is it an operator?
            else if (_reader.SinkOperator())
            {
                current = new OperatorToken();
                return(true);
            }
            // A string?
            else if (_reader.Sink("\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return(true);
                }

                current = new StringLiteralToken();
                return(true);
            }


            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return(true);
        }
Пример #25
0
        /*
         * Method:  FindNextToken
         *
         * Find the next token. Return 'true' if one was found. False, otherwise.
         */
        override internal bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // Dealing with whitespace?
            if (_reader.SinkMultipleWhiteSpace())
            {
                current = new WhitespaceToken();
                return(true);
            }
            // Check for one-line comment
            else if (_reader.Sink("//"))
            {
                // Looks like a one-line comment. Follow it to the End-of-line
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return(true);
            }
            // Check for multi-line comment
            else if (_reader.Sink("/*"))
            {
                _reader.SinkUntil("*/");

                // Was the ending */ found?
                if (_reader.EndOfLines)
                {
                    // No. There was a /* without a */. Return this a syntax error token.
                    current = new CSharpTokenizer.EndOfFileInsideCommentToken();
                    return(true);
                }

                current = new CommentToken();
                return(true);
            }
            // Handle chars
            else if (_reader.Sink("\'"))
            {
                while (_reader.CurrentCharacter != '\'')
                {
                    if (_reader.Sink("\\"))
                    {
                        /* reader.Skip the escape sequence.
                         *  This isn't exactly right. We should detect:
                         *
                         *  simple-escape-sequence: one of
                         \' \" \\ \0 \a \b \f \n \r \t \v
                         *
                         *  hexadecimal-escape-sequence:
                         *  \x   hex-digit   hex-digit[opt]   hex-digit[opt]  hex-digit[opt]
                         */
                    }

                    _reader.SinkCharacter();
                }

                if (_reader.SinkCharacter() != '\'')
                {
                    Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick.");
                }
                current = new CSharpTokenizer.CharLiteralToken();
                return(true);
            }
            // Check for verbatim string
            else if (_reader.Sink("@\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return(true);
                }

                // reader.Skip the ending quote.
                current           = new StringLiteralToken();
                current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1);
                return(true);
            }
            // Check for a quoted string.
            else if (_reader.Sink("\""))
            {
                while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral())
                {
                    // See if we have an escape sequence.
                    if (_reader.SinkCharacter() == '\\')
                    {
                        // This is probably an escape character.
                        if (_reader.SinkStringEscape())
                        {
                            // This isn't nearly right. We just do barely enough to make a string
                            // with an embedded escape sequence return _some_ string whose start and
                            // end match the real bounds of the string.
                        }
                        else
                        {
                            // This is a compiler error.
                            _reader.SinkCharacter();
                            current = new CSharpTokenizer.UnrecognizedStringEscapeToken();
                            return(true);
                        }
                    }
                }

                // Is it a newline?
                if (TokenChar.IsNewLine(_reader.CurrentCharacter))
                {
                    current = new CSharpTokenizer.NewlineInsideStringToken();
                    return(true);
                }

                // Create the token.
                if (_reader.SinkCharacter() != '\"')
                {
                    Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote.");
                }
                current = new StringLiteralToken();
                return(true);
            }
            // Identifier or keyword?
            else if
            (
                // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier.
                _reader.CurrentCharacter == '@' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                if (_reader.CurrentCharacter == '@')
                {
                    _reader.SinkCharacter();
                }

                // Now, the next character must be an identifier start.
                if (!_reader.SinkIdentifierStart())
                {
                    current = new ExpectedIdentifierToken();
                    return(true);
                }

                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);

                switch (identifierOrKeyword)
                {
                default:

                    if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0)
                    {
                        current = new KeywordToken();
                        return(true);
                    }

                    // If the identifier starts with '@' then we need to strip it off.
                    // The '@' is for escaping so that we can have an identifier called
                    // the same thing as a reserved keyword (i.e. class, if, foreach, etc)
                    string identifier = _reader.GetCurrentMatchedString(startPosition);
                    if (identifier.StartsWith("@", StringComparison.Ordinal))
                    {
                        identifier = identifier.Substring(1);
                    }

                    // Create the token.
                    current           = new IdentifierToken();
                    current.InnerText = identifier;
                    return(true);

                case "false":
                case "true":
                    current = new BooleanLiteralToken();
                    return(true);

                case "null":
                    current = new CSharpTokenizer.NullLiteralToken();
                    return(true);
                }
            }
            // Open scope
            else if (_reader.Sink("{"))
            {
                current = new CSharpTokenizer.OpenScopeToken();
                return(true);
            }
            // Close scope
            else if (_reader.Sink("}"))
            {
                current = new CSharpTokenizer.CloseScopeToken();
                return(true);
            }
            // Hexidecimal integer literal
            else if (_reader.SinkIgnoreCase("0x"))
            {
                // Sink the hex digits.
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return(true);
                }

                // Skip the L, U, l, u, ul, etc.
                _reader.SinkLongIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return(true);
            }
            // Decimal integer literal
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // reader.Skip the L, U, l, u, ul, etc.
                _reader.SinkLongIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return(true);
            }
            // Check for single-digit operators and punctuators
            else if (_reader.SinkOperatorOrPunctuator())
            {
                current = new OperatorOrPunctuatorToken();
                return(true);
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.Sink("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.Sink("#endif"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return(true);
            }

            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return(true);
        }
Пример #26
0
        void LexerSmokeTest()
        {
            var sourceCode =
                @"fn foo(i: int) -> int {
    let a=1;const _b=2 as ifelse// boring
    return -i+break 1.0E-5+'\n'0268==
    putint(foo(*123456));
}
";
            var expectedTokens = new Token[]
            {
                new KeywordToken(Keyword.Fn, (0, 0)),
                new IdentifierToken("foo", (0, 3)),
                new OperatorToken(Operator.LeftParen, (0, 6)),
                new IdentifierToken("i", (0, 7)),
                new OperatorToken(Operator.Colon, (0, 8)),
                new IdentifierToken("int", (0, 10)),
                new OperatorToken(Operator.RightParen, (0, 13)),
                new OperatorToken(Operator.Arrow, (0, 15)),
                new IdentifierToken("int", (0, 18)),
                new OperatorToken(Operator.LeftBrace, (0, 22)),

                new KeywordToken(Keyword.Let, (1, 4)),
                new IdentifierToken("a", (1, 8)),
                new OperatorToken(Operator.Assign, (1, 9)),
                new UInt64LiteralToken(1, (1, 10)),
                new OperatorToken(Operator.Semicolon, (1, 11)),
                new KeywordToken(Keyword.Const, (1, 12)),
                new IdentifierToken("_b", (1, 18)),
                new OperatorToken(Operator.Assign, (1, 20)),
                new UInt64LiteralToken(2, (1, 21)),
                new KeywordToken(Keyword.As, (1, 23)),
                new IdentifierToken("ifelse", (1, 26)),

                new KeywordToken(Keyword.Return, (2, 4)),
                new OperatorToken(Operator.Minus, (2, 11)),
                new IdentifierToken("i", (2, 12)),
                new OperatorToken(Operator.Plus, (2, 13)),
                new KeywordToken(Keyword.Break, (2, 14)),
                new DoubleLiteralToken(1.0E-5, (2, 20)),
                new OperatorToken(Operator.Plus, (2, 26)),
                new CharLiteralToken('\n', (2, 27)),
                new UInt64LiteralToken(0268, (2, 31)),
                new OperatorToken(Operator.Equal, (2, 35)),

                new IdentifierToken("putint", (3, 4)),
                new OperatorToken(Operator.LeftParen, (3, 10)),
                new IdentifierToken("foo", (3, 11)),
                new OperatorToken(Operator.LeftParen, (3, 14)),
                new OperatorToken(Operator.Mult, (3, 15)),
                new UInt64LiteralToken(123456, (3, 16)),
                new OperatorToken(Operator.RightParen, (3, 22)),
                new OperatorToken(Operator.RightParen, (3, 23)),
                new OperatorToken(Operator.Semicolon, (3, 24)),

                new OperatorToken(Operator.RightBrace, (4, 0)),
            };

            var lexer  = new Lexer(sourceCode);
            var tokens = lexer.Parse().ToList();

            Assert.Equal(expectedTokens.Length, tokens.Count);

            for (var i = 0; i < expectedTokens.Length; i++)
            {
                var expectedToken = expectedTokens[i];
                var actualToken   = tokens[i];

                AssertJsonEqual(expectedToken, actualToken);
            }
        }
Пример #27
0
 public virtual T applyToKeywordToken(KeywordToken operand)
 {
     return(applyToParseTreeNode(operand));
 }
Пример #28
0
 public ANodeNullLiteral(KeywordToken kwtok)
     : base(kwtok.Location)
 {
     Null = kwtok;
 }
Пример #29
0
 public ANodeBoolLiteral(KeywordToken kwtok)
     : base(kwtok.Location)
 {
     Bool = kwtok;
 }
Пример #30
0
        /// <summary>
        /// 获取Token
        /// </summary>
        /// <returns></returns>
        public Token[] Tokenize()
        {
            var tokens  = new List <Token>();
            var builder = new StringBuilder();

            while (!Eof())
            {
                //跳过空白符
                Skip(CharType.WhiteSpace);

                switch (PeekType())
                {
                case CharType.Alpha:
                    ReadToken(builder, CharType.AlphaNumeric);
                    string s = builder.ToString();
                    if (KeywordToken.IsKeyword(s))
                    {
                        tokens.Add(new KeywordToken(s));
                    }
                    else
                    {
                        tokens.Add(new IdentifierToken(s));
                    }
                    builder.Clear();
                    break;

                case CharType.Numeric:
                    ReadToken(builder, CharType.Numeric);
                    tokens.Add(new NumberLiteralToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.Operator:
                    ReadToken(builder, CharType.Operator);
                    tokens.Add(new OperatorToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.OpenBrace:
                    tokens.Add(new OpenBraceToken(Next().ToString()));
                    break;

                case CharType.CloseBrace:
                    tokens.Add(new CloseBraceToken(Next().ToString()));
                    break;

                case CharType.ArgSeperator:
                    tokens.Add(new ArgSeperatorToken(Next().ToString()));
                    break;

                case CharType.StatementSeperator:
                    tokens.Add(new StatementSperatorToken(Next().ToString()));
                    break;

                default:
                    throw new Exception("The tokenizer found an unidentifiable character.");
                }
            }

            return(tokens.ToArray());
        }
Пример #31
0
        /*
        * Method:  FindNextToken
        *
        * Find the next token. Return 'true' if one was found. False, otherwise.
        */
        internal override bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // VB docs claim whitespace is Unicode category Zs. However,
            // this category does not contain tabs. Assuming a less restrictive
            // definition for whitespace...
            if (_reader.SinkWhiteSpace())
            {
                while (_reader.SinkWhiteSpace())
                {
                }

                // Now, we need to check for the line continuation character.
                if (_reader.SinkLineContinuationCharacter())    // Line continuation is '_'
                {
                    // Save the current position because we may need to come back here.
                    int savePosition = _reader.Position - 1;

                    // Skip all whitespace after the '_'
                    while (_reader.SinkWhiteSpace())
                    {
                    }

                    // Now, skip all the newlines.
                    // Need at least one newline for this to count as line continuation.
                    int count = 0;
                    while (_reader.SinkNewLine())
                    {
                        ++count;
                    }

                    if (count > 0)
                    {
                        current = new VisualBasicTokenizer.LineContinuationToken();
                        return true;
                    }

                    // Otherwise, fall back to plain old whitespace.
                    _reader.Position = savePosition;
                }

                current = new WhitespaceToken();
                return true;
            }
            // Line terminators are separate from whitespace and are significant.
            else if (_reader.SinkNewLine())
            {
                // We want one token per line terminator.
                current = new VisualBasicTokenizer.LineTerminatorToken();
                return true;
            }
            // Check for a comment--either those that start with ' or rem.
            else if (_reader.SinkLineCommentStart())
            {
                // Skip to the first EOL.
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return true;
            }
            // Identifier or keyword?
            else if
            (
                // VB allows escaping of identifiers by surrounding them with []
                // In other words,
                //      Date is a keyword but,
                //      [Date] is an identifier.
                _reader.CurrentCharacter == '[' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                bool escapedIdentifier = false;
                if (_reader.CurrentCharacter == '[')
                {
                    escapedIdentifier = true;
                    _reader.SinkCharacter();

                    // Now, the next character must be an identifier start.
                    if (!_reader.SinkIdentifierStart())
                    {
                        current = new ExpectedIdentifierToken();
                        return true;
                    }
                }

                // Sink the rest of the identifier.
                while (_reader.SinkIdentifierPart())
                {
                }

                // If this was an escaped identifier the we need to get the terminating ']'.
                if (escapedIdentifier)
                {
                    if (!_reader.Sink("]"))
                    {
                        current = new ExpectedIdentifierToken();
                        return true;
                    }
                }
                else
                {
                    // Escaped identifiers are not allowed to have trailing type character.
                    _reader.SinkTypeCharacter(); // Type character is optional.
                }

                // An identifier that is only a '_' is illegal because it is
                // ambiguous with line continuation
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);
                if (identifierOrKeyword == "_" || identifierOrKeyword == "[_]" || identifierOrKeyword == "[]")
                {
                    current = new ExpectedIdentifierToken();
                    return true;
                }

                // Make an upper-case version in order to check whether this may be a keyword.
                string upper = identifierOrKeyword.ToUpper(CultureInfo.InvariantCulture);

                switch (upper)
                {
                    default:

                        if (Array.IndexOf(s_keywordList, upper) >= 0)
                        {
                            current = new KeywordToken();
                            return true;
                        }

                        // Create the token.
                        current = new IdentifierToken();

                        // Trim off the [] if this is an escaped identifier.
                        if (escapedIdentifier)
                        {
                            current.InnerText = identifierOrKeyword.Substring(1, identifierOrKeyword.Length - 2);
                        }
                        return true;
                    case "FALSE":
                    case "TRUE":
                        current = new BooleanLiteralToken();
                        return true;
                }
            }
            // Is it a hex integer?
            else if (_reader.SinkHexIntegerPrefix())
            {
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return true;
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return true;
            }
            // Is it an octal integer?
            else if (_reader.SinkOctalIntegerPrefix())
            {
                if (!_reader.SinkMultipleOctalDigits())
                {
                    current = new VisualBasicTokenizer.ExpectedValidOctalDigitToken();
                    return true;
                }

                // Sink a suffix if there is one.
                _reader.SinkIntegerSuffix();

                current = new VisualBasicTokenizer.OctalIntegerLiteralToken();
                return true;
            }
            // Is it a decimal integer?
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // Sink a suffix if there is one.
                _reader.SinkDecimalIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return true;
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.SinkIgnoreCase("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.SinkIgnoreCase("#end if"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return true;
            }
            // Is it a separator?
            else if (_reader.SinkSeparatorCharacter())
            {
                current = new VisualBasicTokenizer.SeparatorToken();
                return true;
            }
            // Is it an operator?
            else if (_reader.SinkOperator())
            {
                current = new OperatorToken();
                return true;
            }
            // A string?
            else if (_reader.Sink("\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }
                while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return true;
                }

                current = new StringLiteralToken();
                return true;
            }

            // We didn't recognize the token, so this is a syntax error.
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return true;
        }
Пример #32
0
        /*
        * Method:  FindNextToken
        * 
        * Find the next token. Return 'true' if one was found. False, otherwise.
        */
        override internal bool FindNextToken()
        {
            int startPosition = _reader.Position;

            // Dealing with whitespace?
            if (_reader.SinkMultipleWhiteSpace())
            {
                current = new WhitespaceToken();
                return true;
            }
            // Check for one-line comment
            else if (_reader.Sink("//"))
            {
                // Looks like a one-line comment. Follow it to the End-of-line
                _reader.SinkToEndOfLine();

                current = new CommentToken();
                return true;
            }
            // Check for multi-line comment
            else if (_reader.Sink("/*"))
            {
                _reader.SinkUntil("*/");

                // Was the ending */ found?
                if (_reader.EndOfLines)
                {
                    // No. There was a /* without a */. Return this a syntax error token.
                    current = new CSharpTokenizer.EndOfFileInsideCommentToken();
                    return true;
                }

                current = new CommentToken();
                return true;
            }
            // Handle chars
            else if (_reader.Sink("\'"))
            {
                while (_reader.CurrentCharacter != '\'')
                {
                    if (_reader.Sink("\\"))
                    {
                        /* reader.Skip the escape sequence. 
                            This isn't exactly right. We should detect:
                            
                            simple-escape-sequence: one of 
                            \' \" \\ \0 \a \b \f \n \r \t \v 
                            
                            hexadecimal-escape-sequence: 
                            \x   hex-digit   hex-digit[opt]   hex-digit[opt]  hex-digit[opt]                                
                        */
                    }

                    _reader.SinkCharacter();
                }

                if (_reader.SinkCharacter() != '\'')
                {
                    Debug.Assert(false, "Code defect in tokenizer: Should have yielded a closing tick.");
                }
                current = new CSharpTokenizer.CharLiteralToken();
                return true;
            }
            // Check for verbatim string
            else if (_reader.Sink("@\""))
            {
                do
                {
                    // Inside a verbatim string "" is treated as a special character
                    while (_reader.Sink("\"\""))
                    {
                    }
                }
                while (!_reader.EndOfLines && _reader.SinkCharacter() != '\"');

                // Can't end a file inside a string 
                if (_reader.EndOfLines)
                {
                    current = new EndOfFileInsideStringToken();
                    return true;
                }

                // reader.Skip the ending quote.
                current = new StringLiteralToken();
                current.InnerText = _reader.GetCurrentMatchedString(startPosition).Substring(1);
                return true;
            }
            // Check for a quoted string.
            else if (_reader.Sink("\""))
            {
                while (_reader.CurrentCharacter == '\\' || _reader.MatchRegularStringLiteral())
                {
                    // See if we have an escape sequence.
                    if (_reader.SinkCharacter() == '\\')
                    {
                        // This is probably an escape character.
                        if (_reader.SinkStringEscape())
                        {
                            // This isn't nearly right. We just do barely enough to make a string
                            // with an embedded escape sequence return _some_ string whose start and 
                            // end match the real bounds of the string.
                        }
                        else
                        {
                            // This is a compiler error. 
                            _reader.SinkCharacter();
                            current = new CSharpTokenizer.UnrecognizedStringEscapeToken();
                            return true;
                        }
                    }
                }

                // Is it a newline?
                if (TokenChar.IsNewLine(_reader.CurrentCharacter))
                {
                    current = new CSharpTokenizer.NewlineInsideStringToken();
                    return true;
                }

                // Create the token.
                if (_reader.SinkCharacter() != '\"')
                {
                    Debug.Assert(false, "Defect in tokenizer: Should have yielded a terminating quote.");
                }
                current = new StringLiteralToken();
                return true;
            }
            // Identifier or keyword?
            else if
            (
                // From 2.4.2 Identifiers: A '@' can be used to prefix an identifier so that a keyword can be used as an identifier.
                _reader.CurrentCharacter == '@' ||
                _reader.MatchNextIdentifierStart()
            )
            {
                if (_reader.CurrentCharacter == '@')
                {
                    _reader.SinkCharacter();
                }

                // Now, the next character must be an identifier start.
                if (!_reader.SinkIdentifierStart())
                {
                    current = new ExpectedIdentifierToken();
                    return true;
                }

                // Sink the rest of the identifier.                     
                while (_reader.SinkIdentifierPart())
                {
                }
                string identifierOrKeyword = _reader.GetCurrentMatchedString(startPosition);

                switch (identifierOrKeyword)
                {
                    default:

                        if (Array.IndexOf(s_keywordList, identifierOrKeyword) >= 0)
                        {
                            current = new KeywordToken();
                            return true;
                        }

                        // If the identifier starts with '@' then we need to strip it off.
                        // The '@' is for escaping so that we can have an identifier called
                        // the same thing as a reserved keyword (i.e. class, if, foreach, etc)
                        string identifier = _reader.GetCurrentMatchedString(startPosition);
                        if (identifier.StartsWith("@", StringComparison.Ordinal))
                        {
                            identifier = identifier.Substring(1);
                        }

                        // Create the token.
                        current = new IdentifierToken();
                        current.InnerText = identifier;
                        return true;
                    case "false":
                    case "true":
                        current = new BooleanLiteralToken();
                        return true;
                    case "null":
                        current = new CSharpTokenizer.NullLiteralToken();
                        return true;
                }
            }
            // Open scope
            else if (_reader.Sink("{"))
            {
                current = new CSharpTokenizer.OpenScopeToken();
                return true;
            }
            // Close scope
            else if (_reader.Sink("}"))
            {
                current = new CSharpTokenizer.CloseScopeToken();
                return true;
            }
            // Hexidecimal integer literal
            else if (_reader.SinkIgnoreCase("0x"))
            {
                // Sink the hex digits.
                if (!_reader.SinkMultipleHexDigits())
                {
                    current = new ExpectedValidHexDigitToken();
                    return true;
                }

                // Skip the L, U, l, u, ul, etc.                    
                _reader.SinkLongIntegerSuffix();

                current = new HexIntegerLiteralToken();
                return true;
            }
            // Decimal integer literal
            else if (_reader.SinkMultipleDecimalDigits())
            {
                // reader.Skip the L, U, l, u, ul, etc.                    
                _reader.SinkLongIntegerSuffix();

                current = new DecimalIntegerLiteralToken();
                return true;
            }
            // Check for single-digit operators and punctuators
            else if (_reader.SinkOperatorOrPunctuator())
            {
                current = new OperatorOrPunctuatorToken();
                return true;
            }
            // Preprocessor line
            else if (_reader.CurrentCharacter == '#')
            {
                if (_reader.Sink("#if"))
                {
                    current = new OpenConditionalDirectiveToken();
                }
                else if (_reader.Sink("#endif"))
                {
                    current = new CloseConditionalDirectiveToken();
                }
                else
                {
                    current = new PreprocessorToken();
                }

                _reader.SinkToEndOfLine();

                return true;
            }

            // We didn't recognize the token, so this is a syntax error. 
            _reader.SinkCharacter();
            current = new UnrecognizedToken();
            return true;
        }