Example #1
0
        public Token[] Tokenize()
        {
            var tokens = new List <Token>();

            var builder = new StringBuilder();

            while (!eof())
            {
                skip(CharType.WhiteSpace); //white space has no meaning besides sperating tokens (we're not python!)
                switch (peekType())
                {
                case CharType.Alpha:     //start of identifier
                    readToken(builder, CharType.AlphaNumeric);
                    string s = builder.ToString();
                    if (KeywordToken.IsKeyword(s))
                    {
                        tokens.Add(new KeywordToken(s));
                    }
                    else
                    {
                        tokens.Add(new IdentifierToken(s));
                    }
                    builder.Clear();
                    break;

                case CharType.Numeric:     //start of number literal
                    readToken(builder, CharType.Numeric);
                    tokens.Add(new NumberLiteralToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.Operator:
                    readToken(builder, CharType.Operator);
                    tokens.Add(new OperatorToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.OpenBrace:
                    tokens.Add(new OpenBraceToken(next().ToString()));
                    break;

                case CharType.CloseBrace:
                    tokens.Add(new CloseBraceToken(next().ToString()));
                    break;

                case CharType.ArgSeperator:
                    tokens.Add(new ArgSeperatorToken(next().ToString()));
                    break;

                case CharType.StatementSeperator:
                    tokens.Add(new StatementSperatorToken(next().ToString()));
                    break;

                default:
                    throw new Exception("The tokenizer found an unidentifiable character.");
                }
            }

            return(tokens.ToArray());
        }
Example #2
0
        /// <summary>
        /// 获取Token
        /// </summary>
        /// <returns></returns>
        public Token[] Tokenize()
        {
            var tokens  = new List <Token>();
            var builder = new StringBuilder();

            while (!Eof())
            {
                //跳过空白符
                Skip(CharType.WhiteSpace);

                switch (PeekType())
                {
                case CharType.Alpha:
                    ReadToken(builder, CharType.AlphaNumeric);
                    string s = builder.ToString();
                    if (KeywordToken.IsKeyword(s))
                    {
                        tokens.Add(new KeywordToken(s));
                    }
                    else
                    {
                        tokens.Add(new IdentifierToken(s));
                    }
                    builder.Clear();
                    break;

                case CharType.Numeric:
                    ReadToken(builder, CharType.Numeric);
                    tokens.Add(new NumberLiteralToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.Operator:
                    ReadToken(builder, CharType.Operator);
                    tokens.Add(new OperatorToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.OpenBrace:
                    tokens.Add(new OpenBraceToken(Next().ToString()));
                    break;

                case CharType.CloseBrace:
                    tokens.Add(new CloseBraceToken(Next().ToString()));
                    break;

                case CharType.ArgSeperator:
                    tokens.Add(new ArgSeperatorToken(Next().ToString()));
                    break;

                case CharType.StatementSeperator:
                    tokens.Add(new StatementSperatorToken(Next().ToString()));
                    break;

                default:
                    throw new Exception("The tokenizer found an unidentifiable character.");
                }
            }

            return(tokens.ToArray());
        }
Example #3
0
File: Lexer.cs Project: crclz/CZero
        public IEnumerable <Token> Parse()
        {
            while (!_reader.ReachedEnd)
            {
                if (!_reader.AdvanceUntilNonWhite())
                {
                    if (!ReaderReachedEnd)
                    {
                        throw new LexerException("Bad input. Reader did not reach end");
                    }
                    yield break;// End producing token
                }

                var startPosition = _reader.Position;

                char c = _reader.CurrentChar();
                if (char.IsLetter(c) || c == '_')
                {
                    var success = RegexMatch(RegexList.IdentifierOrKeyword, out string result);
                    Debug.Assert(success);// Worst: IdentifierToken{name=CurrentChar}

                    // Determine token is keyword or identifier
                    if (KeywordToken.IsKeyword(result))
                    {
                        yield return(KeywordToken.FromKeywordString(result, startPosition));

                        continue;
                    }
                    else
                    {
                        yield return(new IdentifierToken(result, startPosition));

                        continue;
                    }
                }
                else if (char.IsDigit(c))
                {
                    // Double ----> Unsigned

                    // Double
                    if (TryMatchDouble(out DoubleLiteralToken doubleLiteralToken))
                    {
                        yield return(doubleLiteralToken);

                        continue;
                    }

                    var success = TryMatchUnsigned(out UInt64LiteralToken uInt64LiteralToken);
                    Debug.Assert(success);// Worst: 1-digit unsigned iteral token

                    yield return(uInt64LiteralToken);

                    continue;
                }
                else if (c == '"')
                {
                    if (!TryMatchStringLiteral(out StringLiteralToken stringLiteral))
                    {
                        throw new LexerException("Failed to parse String Literal");
                    }

                    yield return(stringLiteral);

                    continue;
                }
                else if (c == '\'')
                {
                    if (!TryMatchCharLiteral(out CharLiteralToken charLiteral))
                    {
                        throw new LexerException("Failed to parse Char Literal");
                    }

                    yield return(charLiteral);

                    continue;
                }
                else
                {
                    // NOTE: comment should be placed before operators,
                    // because Divide: /, Comment: //

                    // Match comment
                    var isComment = RegexMatch(RegexList.Comment, out string _);
                    if (isComment)
                    {
                        // Ignore comment, do not produce token
                        continue;
                    }

                    // Match operator
                    if (TryMatchOperator(out OperatorToken operatorToken))
                    {
                        yield return(operatorToken);

                        continue;
                    }

                    // Match nothing, throw
                    throw new LexerException($"Unexpected character '{c}' at " +
                                             $"({startPosition.Line},{startPosition.Column})");
                }
            }
        }
Example #4
0
        /// <summary>
        /// Parses the input code and returns an array of Token objects.
        /// </summary>
        /// <returns></returns>
        public Token[] Tokenize()
        {
            var tokens  = new List <Token>();
            var builder = new StringBuilder();

            Code = StripCommentsAndNormalizeNewlines(Code);

            while (!EndOfCode)
            {
                SkipCharacter(CharType.WhiteSpace);
                var nextChar     = PeekNextCharacter();
                var nextCharType = nextChar.GetCharType();
                switch (nextCharType)
                {
                case CharType.Alpha:     //start of identifier
                    ReadTokens(builder, CharType.AlphaNumeric);
                    string s = builder.ToString();
                    if (KeywordToken.IsKeyword(s))
                    {
                        tokens.Add(new KeywordToken(s));
                    }
                    else
                    {
                        tokens.Add(new IdentifierToken(s));
                    }
                    builder.Clear();
                    break;

                case CharType.MemberAccess: //start of member access
                    NextCharacter();        //Skip the period
                    ReadTokens(builder, CharType.AlphaNumeric);
                    tokens.Add(new MemberAccessToken(builder.ToString()));
                    builder.Clear();
                    //TODO: Find an alternative method
                    //Patch the previous token as a TableReferenceToken
                    var previousToken = tokens[tokens.Count - 2];     //second-to-last element
                    if (previousToken is IdentifierToken)
                    {
                        previousToken            = new TableIdentifierToken(previousToken.Content);
                        tokens[tokens.Count - 2] = previousToken;
                    }
                    break;

                case CharType.StringDelimiter:
                    NextCharacter();     //Skip the opening quote
                    ReadTokensUntil(builder, CharType.StringDelimiter);
                    NextCharacter();     //Skip the ending quote
                    tokens.Add(new StringLiteralToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.Numeric:     //start of number literal, allow for decimal numbers too
                    ReadTokens(builder, CharType.DecimalNumeric);
                    tokens.Add(new NumberLiteralToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.Operator:
                    //It is an operator
                    ReadTokens(builder, CharType.Operator);
                    tokens.Add(new OperatorToken(builder.ToString()));
                    builder.Clear();
                    break;

                case CharType.OpenBrace:
                    tokens.Add(new OpenBraceToken(NextCharacter().ToString()));
                    break;

                case CharType.CloseBrace:
                    tokens.Add(new CloseBraceToken(NextCharacter().ToString()));
                    break;

                case CharType.ArgSeperator:
                    tokens.Add(new ArgSeperatorToken(NextCharacter().ToString()));
                    break;

                case CharType.StatementSeperator:
                    tokens.Add(new StatementSeparatorToken(NextCharacter().ToString()));
                    break;

                default:
                    throw new Exception($"The tokenizer found an unidentifiable character: {nextChar}");
                }
            }

            return(tokens.ToArray());
        }