public Token[] Tokenize() { var tokens = new List <Token>(); var builder = new StringBuilder(); while (!eof()) { skip(CharType.WhiteSpace); //white space has no meaning besides sperating tokens (we're not python!) switch (peekType()) { case CharType.Alpha: //start of identifier readToken(builder, CharType.AlphaNumeric); string s = builder.ToString(); if (KeywordToken.IsKeyword(s)) { tokens.Add(new KeywordToken(s)); } else { tokens.Add(new IdentifierToken(s)); } builder.Clear(); break; case CharType.Numeric: //start of number literal readToken(builder, CharType.Numeric); tokens.Add(new NumberLiteralToken(builder.ToString())); builder.Clear(); break; case CharType.Operator: readToken(builder, CharType.Operator); tokens.Add(new OperatorToken(builder.ToString())); builder.Clear(); break; case CharType.OpenBrace: tokens.Add(new OpenBraceToken(next().ToString())); break; case CharType.CloseBrace: tokens.Add(new CloseBraceToken(next().ToString())); break; case CharType.ArgSeperator: tokens.Add(new ArgSeperatorToken(next().ToString())); break; case CharType.StatementSeperator: tokens.Add(new StatementSperatorToken(next().ToString())); break; default: throw new Exception("The tokenizer found an unidentifiable character."); } } return(tokens.ToArray()); }
/// <summary> /// 获取Token /// </summary> /// <returns></returns> public Token[] Tokenize() { var tokens = new List <Token>(); var builder = new StringBuilder(); while (!Eof()) { //跳过空白符 Skip(CharType.WhiteSpace); switch (PeekType()) { case CharType.Alpha: ReadToken(builder, CharType.AlphaNumeric); string s = builder.ToString(); if (KeywordToken.IsKeyword(s)) { tokens.Add(new KeywordToken(s)); } else { tokens.Add(new IdentifierToken(s)); } builder.Clear(); break; case CharType.Numeric: ReadToken(builder, CharType.Numeric); tokens.Add(new NumberLiteralToken(builder.ToString())); builder.Clear(); break; case CharType.Operator: ReadToken(builder, CharType.Operator); tokens.Add(new OperatorToken(builder.ToString())); builder.Clear(); break; case CharType.OpenBrace: tokens.Add(new OpenBraceToken(Next().ToString())); break; case CharType.CloseBrace: tokens.Add(new CloseBraceToken(Next().ToString())); break; case CharType.ArgSeperator: tokens.Add(new ArgSeperatorToken(Next().ToString())); break; case CharType.StatementSeperator: tokens.Add(new StatementSperatorToken(Next().ToString())); break; default: throw new Exception("The tokenizer found an unidentifiable character."); } } return(tokens.ToArray()); }
public IEnumerable <Token> Parse() { while (!_reader.ReachedEnd) { if (!_reader.AdvanceUntilNonWhite()) { if (!ReaderReachedEnd) { throw new LexerException("Bad input. Reader did not reach end"); } yield break;// End producing token } var startPosition = _reader.Position; char c = _reader.CurrentChar(); if (char.IsLetter(c) || c == '_') { var success = RegexMatch(RegexList.IdentifierOrKeyword, out string result); Debug.Assert(success);// Worst: IdentifierToken{name=CurrentChar} // Determine token is keyword or identifier if (KeywordToken.IsKeyword(result)) { yield return(KeywordToken.FromKeywordString(result, startPosition)); continue; } else { yield return(new IdentifierToken(result, startPosition)); continue; } } else if (char.IsDigit(c)) { // Double ----> Unsigned // Double if (TryMatchDouble(out DoubleLiteralToken doubleLiteralToken)) { yield return(doubleLiteralToken); continue; } var success = TryMatchUnsigned(out UInt64LiteralToken uInt64LiteralToken); Debug.Assert(success);// Worst: 1-digit unsigned iteral token yield return(uInt64LiteralToken); continue; } else if (c == '"') { if (!TryMatchStringLiteral(out StringLiteralToken stringLiteral)) { throw new LexerException("Failed to parse String Literal"); } yield return(stringLiteral); continue; } else if (c == '\'') { if (!TryMatchCharLiteral(out CharLiteralToken charLiteral)) { throw new LexerException("Failed to parse Char Literal"); } yield return(charLiteral); continue; } else { // NOTE: comment should be placed before operators, // because Divide: /, Comment: // // Match comment var isComment = RegexMatch(RegexList.Comment, out string _); if (isComment) { // Ignore comment, do not produce token continue; } // Match operator if (TryMatchOperator(out OperatorToken operatorToken)) { yield return(operatorToken); continue; } // Match nothing, throw throw new LexerException($"Unexpected character '{c}' at " + $"({startPosition.Line},{startPosition.Column})"); } } }
/// <summary> /// Parses the input code and returns an array of Token objects. /// </summary> /// <returns></returns> public Token[] Tokenize() { var tokens = new List <Token>(); var builder = new StringBuilder(); Code = StripCommentsAndNormalizeNewlines(Code); while (!EndOfCode) { SkipCharacter(CharType.WhiteSpace); var nextChar = PeekNextCharacter(); var nextCharType = nextChar.GetCharType(); switch (nextCharType) { case CharType.Alpha: //start of identifier ReadTokens(builder, CharType.AlphaNumeric); string s = builder.ToString(); if (KeywordToken.IsKeyword(s)) { tokens.Add(new KeywordToken(s)); } else { tokens.Add(new IdentifierToken(s)); } builder.Clear(); break; case CharType.MemberAccess: //start of member access NextCharacter(); //Skip the period ReadTokens(builder, CharType.AlphaNumeric); tokens.Add(new MemberAccessToken(builder.ToString())); builder.Clear(); //TODO: Find an alternative method //Patch the previous token as a TableReferenceToken var previousToken = tokens[tokens.Count - 2]; //second-to-last element if (previousToken is IdentifierToken) { previousToken = new TableIdentifierToken(previousToken.Content); tokens[tokens.Count - 2] = previousToken; } break; case CharType.StringDelimiter: NextCharacter(); //Skip the opening quote ReadTokensUntil(builder, CharType.StringDelimiter); NextCharacter(); //Skip the ending quote tokens.Add(new StringLiteralToken(builder.ToString())); builder.Clear(); break; case CharType.Numeric: //start of number literal, allow for decimal numbers too ReadTokens(builder, CharType.DecimalNumeric); tokens.Add(new NumberLiteralToken(builder.ToString())); builder.Clear(); break; case CharType.Operator: //It is an operator ReadTokens(builder, CharType.Operator); tokens.Add(new OperatorToken(builder.ToString())); builder.Clear(); break; case CharType.OpenBrace: tokens.Add(new OpenBraceToken(NextCharacter().ToString())); break; case CharType.CloseBrace: tokens.Add(new CloseBraceToken(NextCharacter().ToString())); break; case CharType.ArgSeperator: tokens.Add(new ArgSeperatorToken(NextCharacter().ToString())); break; case CharType.StatementSeperator: tokens.Add(new StatementSeparatorToken(NextCharacter().ToString())); break; default: throw new Exception($"The tokenizer found an unidentifiable character: {nextChar}"); } } return(tokens.ToArray()); }