/** Parses a simple name with special handling for the reserved word \c value which may be used as a name. */ private Token ParseName() { if (_matcher.This.Kind == TokenKind.Keyword_Value) { // Consume the bogus keyword token. _matcher.Match(TokenKind.Keyword_Value); // Substitute a new token that is a plain name instead of the discarded 'value' keyword token. Token result = new Token(_matcher.This.Cursor); result.Kind = TokenKind.Name; result.Text = "value"; return result; } return _matcher.Match(TokenKind.Name); }
public void PrintToken(Token token) { switch (token.Kind) // case labels are sorted on decreasing expected frequence of appearance { case TokenKind.Space: // simply count the number of spaces for now; they'll be appended if not followed by a comment _spaces.Add(token); return; case TokenKind.EndOfLine: // stay in state zero (0) until we've reached the end of this run of blank lines if (_blank == null) _blank = token; else if (token.Cursor.Line - _blank.Cursor.Line + 1 >= 2 + 1) throw new ParserError(token.Cursor, "Two or more consecutive blank lines detected"); // silently discard trailing spaces and comments _spaces.Clear(); return; case TokenKind.Comment: // filter out comment while preparing to erase the coming end-of-line _blank = null; _spaces.Clear(); return; default: if (_blank != null) { _tokens.Add(_blank); _blank = null; } else { // otherwise, output the buffered space and go back to state 0 foreach (Token space in _spaces) _tokens.Add(space); } _spaces.Clear(); _tokens.Add(token); break; } #if false System.Console.WriteLine("{0} Token: {1} = |{2}|", token.Cursor.ToString(), token.Kind, token.Text); #endif }
/** Scans (reads) a single token, taking all sorts of bizarre indenting and dedenting issues into consideration. */ public bool ProduceToken() { // reset the shared token string buffer _text.Length = 0; Token token = new Token(_cursor); char ch = ReadChar(); bool keyword = true; // false => don't try to convert identifiers into keywords switch (ch) { case ' ': case '\t': // gather indentation or embedded spaces _text.Append(ch); while (_nextChar == ' ' || _nextChar == '\t') _text.Append(ReadChar()); // handle embedded spaces first if (token.Cursor.Char > 1) { // expand tabs to spaces (in-place expansion for speed) string whitespace = _text.ToString(); _text.Length = 0; int pos = token.Cursor.Char; foreach (char ws in whitespace) { if (ws == ' ') { _text.Append(ws); continue; } int width = TABSIZE - (pos % TABSIZE) + 1; for (int i = 0; i < width; i+= 1) _text.Append(' '); pos += width; } string spaces = _text.ToString(); // output a space token for each space scanned token.Kind = TokenKind.Space; token.Text = " "; for (int i = 0; i < spaces.Length; i++) { OnTokenReady(token); token.Cursor.Char += 1; } return true; } // handle indentation and produce synthetic dedent tokens if needed // assume we'll produce an indent token token.Kind = TokenKind.Indent; token.Text = _text.ToString(); // disallow mixing spaces and tabs in indentation (use one or the other!) if (token.Text.IndexOf(' ') != -1 && token.Text.IndexOf('\t') != -1) OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Mixture of spaces and tabs in indentation"); // silently convert a multiple of TABSIZE spaces into tabs so as to make life easier for the user if (token.Text.IndexOf(' ') != -1) { if ((token.Text.Length % TABSIZE) != 0) OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Incorrect number of spaces in indentation"); token.Text = new string('\t', token.Text.Length / TABSIZE); } int level = token.Text.Length; // check if the indentation increase exceeds one level if (level > _indent + 1) OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Multiple indents in a single line not allowed"); else if (level == _indent + 1) { _indent = level; Token indent = new Token(token.Cursor); indent.Kind = TokenKind.Indent; indent.Text = "(indent)"; OnTokenReady(indent); } else if (level == _indent) { // no change in indentation, don't emit a token return true; } else { // produce synthetic Dedent tokens as needed int dedents = _indent - level; if (dedents < 1) throw new InternalError("Error computing number of syntetic dedents to produce"); _indent = level; token.Text = "(indent)"; for (int i = 0; i < dedents; i++) { Token dedent = new Token(token.Cursor); dedent.Kind = TokenKind.Dedent; dedent.Text = "(dedent)"; OnTokenReady(dedent); } } #if false // pretend that we're going to return a string of spaces as a single token (old code) token.Kind = TokenKind.Space; token.Text = _text.ToString(); // create synthetic tokens for all of the found spaces (token is discarded!) Cursor start = new Cursor(token.Cursor); for (int i = 0; i < token.Text.Length; i += 1) { Token space = new Token(start); space.Kind = (int) TokenKind.Space; space.Text = " "; OnTokenReady(space); start.Char += 1; } #endif return true; case '#': while (_nextChar != '\n' && _nextChar != EOF) _text.Append(ReadChar()); token.Kind = TokenKind.Comment; token.Text = _text.ToString(); break; case EOF: // mandate the presence of a trailing line terminator on the last line (required by the language syntax) if (token.Cursor.Char != 1) throw new ScannerError(token.Cursor, "Last line must be terminated with linefeed"); // create trailing dedent tokens for (; _indent > 0; _indent--) { Token dedent = new Token(token.Cursor); dedent.Kind = TokenKind.Dedent; dedent.Text = "(dedent)"; OnTokenReady(dedent); } token.Kind = TokenKind.EndOfFile; token.Text = "(eof)"; OnTokenReady(token); return false; case '\n': token.Kind = TokenKind.EndOfLine; token.Text = string.Empty; break; case '.': #if false /** \todo We don't need the range token (..) for anything, do we? */ if (_nextChar == '.') { ReadChar(); token.Kind = TokenKind.Range; token.Text = ".."; break; } #endif token.Kind = TokenKind.Dot; token.Text = "."; break; case '&': if (_nextChar != '=') OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid token"); ReadChar(); token.Kind = TokenKind.Assign_Bitwise_And; token.Text = "&="; break; case '/': if (_nextChar != '=') { token.Kind = TokenKind.Operator_Divide; token.Text = "/"; break; } ReadChar(); token.Kind = TokenKind.Assign_Divide; token.Text = "/="; break; case ':': if (_nextChar == '=') { ReadChar(); token.Kind = TokenKind.Assign_Identity; token.Text = ":="; break; } token.Kind = TokenKind.Colon; token.Text = ":"; break; case '-': // parse integer literals if (IsDigit(_nextChar)) goto case '0'; if (_nextChar != '=') { token.Kind = TokenKind.Operator_Subtract; token.Text = "-"; break; } ReadChar(); token.Kind = TokenKind.Assign_Subtract; token.Text = "-="; break; case '*': if (_nextChar != '=') { token.Kind = TokenKind.Operator_Multiply; token.Text = "*"; break; } ReadChar(); token.Kind = TokenKind.Assign_Multiply; token.Text = "*="; break; case '|': if (_nextChar != '=') OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid token"); ReadChar(); token.Kind = TokenKind.Assign_Bitwise_Ior; token.Text = "|="; break; case '+': if (_nextChar != '=') { token.Kind = TokenKind.Operator_Add; token.Text = "+"; break; } ReadChar(); token.Kind = TokenKind.Assign_Add; token.Text = "+="; break; case '?': if (_nextChar != '=') { token.Kind = TokenKind.Operator_IfElse; token.Text = "?"; break; } ReadChar(); token.Kind = TokenKind.Assign_Default; token.Text = "?="; break; case '^': if (_nextChar != '=') OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid token"); ReadChar(); token.Kind = TokenKind.Assign_Bitwise_Xor; token.Text = "^="; break; case '\'': token.Kind = TokenKind.Literal_Character; token.Text = "" + ReadEscapedChar(token.Cursor); if (_nextChar != '\'') OnErrorReady(token.Cursor, ErrorKind.Error, 1, "Missing character literal terminator"); else ReadChar(); break; case '%': if (_nextChar == '=') { ReadChar(); token.Kind = TokenKind.Assign_Modulus; token.Text = "%="; break; } token.Kind = TokenKind.Operator_Modulus; token.Text = "%"; break; case ',': token.Kind = TokenKind.Comma; token.Text = ","; break; case '(': token.Kind = TokenKind.ParenthesisBegin; token.Text = "("; break; case ')': token.Kind = TokenKind.ParenthesisClose; token.Text = ")"; break; case '<': if (_nextChar == '<') { ReadChar(); token.Kind = TokenKind.Print; token.Text = "<<"; break; } else if (_nextChar == '=') { ReadChar(); token.Kind = TokenKind.Relational_LessEqual; token.Text = "<="; break; } else if (_nextChar == '>') { ReadChar(); token.Kind = TokenKind.Relational_Difference; token.Text = "<>"; break; } token.Kind = TokenKind.Relational_LessThan; token.Text = "<"; break; case '>': if (_nextChar == '>') { ReadChar(); token.Kind = TokenKind.Parse; token.Text = ">>"; break; } else if (_nextChar == '=') { ReadChar(); token.Kind = TokenKind.Relational_GreaterEqual; token.Text = ">="; break; } token.Kind = TokenKind.Relational_GreaterThan; token.Text = ">"; break; case '[': token.Kind = TokenKind.BracketBegin; token.Text = "["; break; case ']': token.Kind = TokenKind.BracketClose; token.Text = "]"; break; case '=': if (_nextChar != '=') { // treat single '=' as ':=' OnErrorReady(token.Cursor, ErrorKind.Error, 1, "Braceless uses Pascal-style assignments (:=)"); token.Kind = TokenKind.Assign_Identity; token.Text = ":="; break; } ReadChar(); // skip second equal sign token.Kind = TokenKind.Relational_Equality; token.Text = "=="; break; case '!': token.Kind = TokenKind.Exclamation; token.Text = "!"; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': _text.Append(ch); while (IsDigit(_nextChar)) _text.Append(ReadChar()); if (_nextChar != '.') token.Kind = TokenKind.Literal_Cardinal; else { token.Kind = TokenKind.Literal_Real; // read real decimals _text.Append(ReadChar()); while (IsDigit(_nextChar)) { _text.Append(ReadChar()); } if (_text[_text.Length - 1] == '.') OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid real literal"); } token.Text = _text.ToString(); if (token.Kind == TokenKind.Literal_Cardinal && token.Text[0] == '-') token.Kind = TokenKind.Literal_Integer; break; case '\"': while (_nextChar != '\"') { _text.Append(ReadEscapedChar(token.Cursor)); } ReadChar(); // skip terminating quote token.Kind = TokenKind.Literal_Text; token.Text = _text.ToString(); break; case '`': while (_nextChar != '`') { _text.Append(ReadChar()); } ReadChar(); // skip terminating quote token.Kind = TokenKind.Name; token.Text = _text.ToString(); if (!ValidBackquotedName(token.Text)) OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Malformed quoted name (check spaces and underscores)"); /** \note No need to check if quoted names are keywords; keywords never contain spaces and/or underscores. */ break; case '@': keyword = false; ch = ReadChar(); // discard leading ampersand (@) goto default; default: { // scan the first character (must be a letter) if (!IsLetter(ch)) { OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid character: " + ch); return true; } _text.Append(ch); // scan the name while (IsName(_nextChar)) _text.Append(ReadChar()); // check that the name is welformed token.Text = _text.ToString(); if (!ValidUnquotedName(token.Text)) OnErrorReady(token.Cursor, ErrorKind.Error, 1, "Malformed unquoted name (check underscores)"); // try to look up keyword and retrieve its numerical identifier TokenKind kind; if (keyword && _keywords.TryGetValue(token.Text, out kind)) token.Kind = kind; else token.Kind = TokenKind.Name; break; } } OnTokenReady(token); return true; }