private Token ReadIdentifierOrKeyword(CodeReader reader) { string identifier = ""; int tokenLine = reader.Line; int tokenColumn = reader.Column; if (!reader.CheckCurrent(IsValidIdentifierStartChar)) { OnError("Invalid identifier name start", tokenLine, tokenColumn); } identifier += reader.Consume(); while (reader.CheckCurrent(IsValidIdentifierChar)) { identifier += reader.Consume(); } return(new Token(identifier, IsKeyword(identifier) ? TokenKind.Keyword : TokenKind.Identifier, tokenLine, tokenColumn)); }
private Token ReadTextLiteral(CodeReader reader) { Match(reader, '\"'); string text = ""; int tokenLine = reader.Line; int tokenColumn = reader.Column; while (!reader.EndOfFile && !reader.EndOfLine) { if (reader.TryMatch('\"')) { return(new Token(text, TokenKind.Text, tokenLine, tokenColumn)); } text += reader.Consume(); } OnError("Invalid text literal ending", tokenLine, tokenColumn + text.Length); return(new Token(text, TokenKind.Text, tokenLine, tokenColumn)); }
/// <summary> /// Separate a string with the code into several tokens specifying the content of the token, the kind and position on the code. /// </summary> /// <param name="code">A code in a specific language to be tokenized.</param> /// <returns>A IEnumerable object with all token decomposition of code.</returns> public IEnumerable <Token> GetTokens(string code) { // Creates a reader of the code. This object allow to consume characters, check conditions and determine whenever a new line or end of file is detected. CodeReader reader = new CodeReader(code); // If there is no characted on the code we get an empty enumerable. if (reader.EndOfFile) { yield break; } // Move reader to first character of the code. reader.MoveHead(); // While reader has code to read. while (!reader.EndOfFile) { // save current character to check what kind of token should be read. char current = reader.Current; // The current character is an instruction-separator char that should be read. if (IsSeparatorChar(current)) { yield return(ReadSeparator(reader)); continue; } // The current character is a white-space character. if (IsEmptySpaceChar(current)) { reader.Consume(); // just consume the white-space character. continue; } // Current and next characters are slahes, identifying comment starting that should be read. if (current == '/' && reader.HasNext && reader.Next == '/') { yield return(ReadComment(reader)); continue; } // Current character is a symbol and should be read. if (IsSymbol(current)) { yield return(ReadSymbol(reader)); continue; } // Current character is a text start character so literal text should be read. if (IsTextLiteralDelimiterChar(current)) { yield return(ReadTextLiteral(reader)); continue; } // Current character is a valid identifier begginig that should be read. if (IsValidIdentifierStartChar(current)) { yield return(ReadIdentifierOrKeyword(reader)); continue; } // Current character is a digit, representing a number that should be read. if (reader.CheckCurrent(char.IsDigit)) // read a number { yield return(ReadNumber(reader)); continue; } // Reaches an unknown character that is returned as a unknown token type. yield return(ReadUnknownSymbol(reader)); } }