Exemple #1
0
        /** Parses a simple name with special handling for the reserved word \c value which may be used as a name. */
        private Token ParseName()
        {
            if (_matcher.This.Kind == TokenKind.Keyword_Value)
            {
                // Consume the bogus keyword token.
                _matcher.Match(TokenKind.Keyword_Value);

                // Substitute a new token that is a plain name instead of the discarded 'value' keyword token.
                Token result = new Token(_matcher.This.Cursor);
                result.Kind = TokenKind.Name;
                result.Text = "value";
                return result;
            }

            return _matcher.Match(TokenKind.Name);
        }
Exemple #2
0
        public void PrintToken(Token token)
        {
            switch (token.Kind) // case labels are sorted on decreasing expected frequence of appearance
            {
                case TokenKind.Space:
                    // simply count the number of spaces for now; they'll be appended if not followed by a comment
                    _spaces.Add(token);
                    return;

                case TokenKind.EndOfLine:
                    // stay in state zero (0) until we've reached the end of this run of blank lines
                    if (_blank == null)
                        _blank = token;
                    else if (token.Cursor.Line - _blank.Cursor.Line + 1 >= 2 + 1)
                        throw new ParserError(token.Cursor, "Two or more consecutive blank lines detected");

                    // silently discard trailing spaces and comments
                    _spaces.Clear();
                    return;

                case TokenKind.Comment:
                    // filter out comment while preparing to erase the coming end-of-line
                    _blank = null;
                    _spaces.Clear();
                    return;

                default:
                    if (_blank != null)
                    {
                        _tokens.Add(_blank);
                        _blank = null;
                    }
                    else
                    {
                        // otherwise, output the buffered space and go back to state 0
                        foreach (Token space in _spaces)
                            _tokens.Add(space);
                    }
                    _spaces.Clear();

                    _tokens.Add(token);
                    break;
            }

            #if false
            System.Console.WriteLine("{0} Token: {1} = |{2}|", token.Cursor.ToString(), token.Kind, token.Text);
            #endif
        }
Exemple #3
0
        /** Scans (reads) a single token, taking all sorts of bizarre indenting and dedenting issues into consideration. */
        public bool ProduceToken()
        {
            // reset the shared token string buffer
            _text.Length = 0;

            Token token = new Token(_cursor);
            char ch = ReadChar();
            bool keyword = true;        // false => don't try to convert identifiers into keywords
            switch (ch)
            {
                case ' ':
                case '\t':
                    // gather indentation or embedded spaces
                    _text.Append(ch);
                    while (_nextChar == ' ' || _nextChar == '\t')
                        _text.Append(ReadChar());

                    // handle embedded spaces first
                    if (token.Cursor.Char > 1)
                    {
                        // expand tabs to spaces (in-place expansion for speed)
                        string whitespace = _text.ToString();
                        _text.Length = 0;
                        int pos = token.Cursor.Char;
                        foreach (char ws in whitespace)
                        {
                            if (ws == ' ')
                            {
                                _text.Append(ws);
                                continue;
                            }

                            int width = TABSIZE - (pos % TABSIZE) + 1;
                            for (int i = 0; i < width; i+= 1)
                                _text.Append(' ');
                            pos += width;
                        }
                        string spaces = _text.ToString();

                        // output a space token for each space scanned
                        token.Kind = TokenKind.Space;
                        token.Text = " ";
                        for (int i = 0; i < spaces.Length; i++)
                        {
                            OnTokenReady(token);
                            token.Cursor.Char += 1;
                        }

                        return true;
                    }

                    // handle indentation and produce synthetic dedent tokens if needed

                    // assume we'll produce an indent token
                    token.Kind = TokenKind.Indent;
                    token.Text = _text.ToString();

                    // disallow mixing spaces and tabs in indentation (use one or the other!)
                    if (token.Text.IndexOf(' ') != -1 && token.Text.IndexOf('\t') != -1)
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Mixture of spaces and tabs in indentation");

                    // silently convert a multiple of TABSIZE spaces into tabs so as to make life easier for the user
                    if (token.Text.IndexOf(' ') != -1)
                    {
                        if ((token.Text.Length % TABSIZE) != 0)
                            OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Incorrect number of spaces in indentation");
                        token.Text = new string('\t', token.Text.Length / TABSIZE);
                    }
                    int level = token.Text.Length;

                    // check if the indentation increase exceeds one level
                    if (level > _indent + 1)
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Multiple indents in a single line not allowed");
                    else if (level == _indent + 1)
                    {
                        _indent = level;
                        Token indent = new Token(token.Cursor);
                        indent.Kind = TokenKind.Indent;
                        indent.Text = "(indent)";
                        OnTokenReady(indent);
                    }
                    else if (level == _indent)
                    {
                        // no change in indentation, don't emit a token
                        return true;
                    }
                    else
                    {
                        // produce synthetic Dedent tokens as needed
                        int dedents = _indent - level;
                        if (dedents < 1)
                            throw new InternalError("Error computing number of syntetic dedents to produce");

                        _indent = level;
                        token.Text = "(indent)";

                        for (int i = 0; i < dedents; i++)
                        {
                            Token dedent = new Token(token.Cursor);
                            dedent.Kind = TokenKind.Dedent;
                            dedent.Text = "(dedent)";
                            OnTokenReady(dedent);
                        }
                    }

            #if false
                    // pretend that we're going to return a string of spaces as a single token (old code)
                    token.Kind = TokenKind.Space;
                    token.Text = _text.ToString();

                    // create synthetic tokens for all of the found spaces (token is discarded!)
                    Cursor start = new Cursor(token.Cursor);
                    for (int i = 0; i < token.Text.Length; i += 1)
                    {
                        Token space = new Token(start);
                        space.Kind = (int) TokenKind.Space;
                        space.Text = " ";
                        OnTokenReady(space);
                        start.Char += 1;
                    }
            #endif

                    return true;

                case '#':
                    while (_nextChar != '\n' && _nextChar != EOF)
                        _text.Append(ReadChar());
                    token.Kind = TokenKind.Comment;
                    token.Text = _text.ToString();
                    break;

                case EOF:
                    // mandate the presence of a trailing line terminator on the last line (required by the language syntax)
                    if (token.Cursor.Char != 1)
                        throw new ScannerError(token.Cursor, "Last line must be terminated with linefeed");

                    // create trailing dedent tokens
                    for (; _indent > 0; _indent--)
                    {
                        Token dedent = new Token(token.Cursor);
                        dedent.Kind = TokenKind.Dedent;
                        dedent.Text = "(dedent)";
                        OnTokenReady(dedent);
                    }

                    token.Kind = TokenKind.EndOfFile;
                    token.Text = "(eof)";
                    OnTokenReady(token);

                    return false;

                case '\n':
                    token.Kind = TokenKind.EndOfLine;
                    token.Text = string.Empty;
                    break;

                case '.':
            #if false
                    /** \todo We don't need the range token (..) for anything, do we? */
                    if (_nextChar == '.')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Range;
                        token.Text = "..";
                        break;
                    }
            #endif

                    token.Kind = TokenKind.Dot;
                    token.Text = ".";
                    break;

                case '&':
                    if (_nextChar != '=')
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid token");

                    ReadChar();
                    token.Kind = TokenKind.Assign_Bitwise_And;
                    token.Text = "&=";
                    break;

                case '/':
                    if (_nextChar != '=')
                    {
                        token.Kind = TokenKind.Operator_Divide;
                        token.Text = "/";
                        break;
                    }

                    ReadChar();
                    token.Kind = TokenKind.Assign_Divide;
                    token.Text = "/=";
                    break;

                case ':':
                    if (_nextChar == '=')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Assign_Identity;
                        token.Text = ":=";
                        break;
                    }

                    token.Kind = TokenKind.Colon;
                    token.Text = ":";
                    break;

                case '-':
                    // parse integer literals
                    if (IsDigit(_nextChar))
                        goto case '0';

                    if (_nextChar != '=')
                    {
                        token.Kind = TokenKind.Operator_Subtract;
                        token.Text = "-";
                        break;
                    }

                    ReadChar();
                    token.Kind = TokenKind.Assign_Subtract;
                    token.Text = "-=";
                    break;

                case '*':
                    if (_nextChar != '=')
                    {
                        token.Kind = TokenKind.Operator_Multiply;
                        token.Text = "*";
                        break;
                    }

                    ReadChar();
                    token.Kind = TokenKind.Assign_Multiply;
                    token.Text = "*=";
                    break;

                case '|':
                    if (_nextChar != '=')
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid token");

                    ReadChar();
                    token.Kind = TokenKind.Assign_Bitwise_Ior;
                    token.Text = "|=";
                    break;

                case '+':
                    if (_nextChar != '=')
                    {
                        token.Kind = TokenKind.Operator_Add;
                        token.Text = "+";
                        break;
                    }

                    ReadChar();
                    token.Kind = TokenKind.Assign_Add;
                    token.Text = "+=";
                    break;

                case '?':
                    if (_nextChar != '=')
                    {
                        token.Kind = TokenKind.Operator_IfElse;
                        token.Text = "?";
                        break;
                    }

                    ReadChar();
                    token.Kind = TokenKind.Assign_Default;
                    token.Text = "?=";
                    break;

                case '^':
                    if (_nextChar != '=')
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid token");

                    ReadChar();
                    token.Kind = TokenKind.Assign_Bitwise_Xor;
                    token.Text = "^=";
                    break;

                case '\'':
                    token.Kind = TokenKind.Literal_Character;
                    token.Text = "" + ReadEscapedChar(token.Cursor);
                    if (_nextChar != '\'')
                        OnErrorReady(token.Cursor, ErrorKind.Error, 1, "Missing character literal terminator");
                    else
                        ReadChar();
                    break;

                case '%':
                    if (_nextChar == '=')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Assign_Modulus;
                        token.Text = "%=";
                        break;
                    }

                    token.Kind = TokenKind.Operator_Modulus;
                    token.Text = "%";
                    break;

                case ',':
                    token.Kind = TokenKind.Comma;
                    token.Text = ",";
                    break;

                case '(':
                    token.Kind = TokenKind.ParenthesisBegin;
                    token.Text = "(";
                    break;

                case ')':
                    token.Kind = TokenKind.ParenthesisClose;
                    token.Text = ")";
                    break;

                case '<':
                    if (_nextChar == '<')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Print;
                        token.Text = "<<";
                        break;
                    }
                    else if (_nextChar == '=')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Relational_LessEqual;
                        token.Text = "<=";
                        break;
                    }
                    else if (_nextChar == '>')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Relational_Difference;
                        token.Text = "<>";
                        break;
                    }

                    token.Kind = TokenKind.Relational_LessThan;
                    token.Text = "<";
                    break;

                case '>':
                    if (_nextChar == '>')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Parse;
                        token.Text = ">>";
                        break;
                    }
                    else if (_nextChar == '=')
                    {
                        ReadChar();
                        token.Kind = TokenKind.Relational_GreaterEqual;
                        token.Text = ">=";
                        break;
                    }

                    token.Kind = TokenKind.Relational_GreaterThan;
                    token.Text = ">";
                    break;

                case '[':
                    token.Kind = TokenKind.BracketBegin;
                    token.Text = "[";
                    break;

                case ']':
                    token.Kind = TokenKind.BracketClose;
                    token.Text = "]";
                    break;

                case '=':
                    if (_nextChar != '=')
                    {
                        // treat single '=' as ':='
                        OnErrorReady(token.Cursor, ErrorKind.Error, 1, "Braceless uses Pascal-style assignments (:=)");
                        token.Kind = TokenKind.Assign_Identity;
                        token.Text = ":=";
                        break;
                    }

                    ReadChar();         // skip second equal sign
                    token.Kind = TokenKind.Relational_Equality;
                    token.Text = "==";
                    break;

                case '!':
                    token.Kind = TokenKind.Exclamation;
                    token.Text = "!";
                    break;

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    _text.Append(ch);
                    while (IsDigit(_nextChar))
                        _text.Append(ReadChar());

                    if (_nextChar != '.')
                        token.Kind = TokenKind.Literal_Cardinal;
                    else
                    {
                        token.Kind = TokenKind.Literal_Real;

                        // read real decimals
                        _text.Append(ReadChar());
                        while (IsDigit(_nextChar))
                        {
                            _text.Append(ReadChar());
                        }

                        if (_text[_text.Length - 1] == '.')
                            OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid real literal");
                    }

                    token.Text = _text.ToString();

                    if (token.Kind == TokenKind.Literal_Cardinal && token.Text[0] == '-')
                        token.Kind = TokenKind.Literal_Integer;
                    break;

                case '\"':
                    while (_nextChar != '\"')
                    {
                        _text.Append(ReadEscapedChar(token.Cursor));
                    }
                    ReadChar();         // skip terminating quote
                    token.Kind = TokenKind.Literal_Text;
                    token.Text = _text.ToString();
                    break;

                case '`':
                    while (_nextChar != '`')
                    {
                        _text.Append(ReadChar());
                    }
                    ReadChar();         // skip terminating quote
                    token.Kind = TokenKind.Name;
                    token.Text = _text.ToString();

                    if (!ValidBackquotedName(token.Text))
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Malformed quoted name (check spaces and underscores)");

                    /** \note No need to check if quoted names are keywords; keywords never contain spaces and/or underscores. */
                    break;

                case '@':
                    keyword = false;
                    ch = ReadChar();    // discard leading ampersand (@)
                    goto default;

                default:
                {
                    // scan the first character (must be a letter)
                    if (!IsLetter(ch))
                    {
                        OnErrorReady(token.Cursor, ErrorKind.Fatal, 1, "Invalid character: " + ch);
                        return true;
                    }
                    _text.Append(ch);

                    // scan the name
                    while (IsName(_nextChar))
                        _text.Append(ReadChar());

                    // check that the name is welformed
                    token.Text = _text.ToString();
                    if (!ValidUnquotedName(token.Text))
                        OnErrorReady(token.Cursor, ErrorKind.Error, 1, "Malformed unquoted name (check underscores)");

                    // try to look up keyword and retrieve its numerical identifier
                    TokenKind kind;
                    if (keyword && _keywords.TryGetValue(token.Text, out kind))
                        token.Kind = kind;
                    else
                        token.Kind = TokenKind.Name;

                    break;
                }
            }

            OnTokenReady(token);
            return true;
        }