示例#1
0
        /*
         * Expect functions return Lexemes of the types they expect.
         * If the input contains any unexpected characters,
         * an exception is thrown.
         *
         * Expected regex: (\a|_)(\a|\d|_)*
         */
        public Lexeme ExpectIdentifierOrKeyword()
        {
            InputBuffer.Position pos = input.GetPosition();
            if (!input.HasCharacter()) // check end of file
            {
                throw new LexerException("identifier or keyword expected", pos);
            }
            if (!(Char.IsLetter(input.PeekCharacter()))) // identifier starts with a letter
            {
                throw new LexerException("identifier or keyword must start with a letter", pos);
            }
            string s = "";

            do
            {
                // read any letter, digit or underscore
                char c = input.PeekCharacter();
                if (!(Char.IsLetterOrDigit(c) || c == '_'))
                {
                    break;
                }
                s += c;
            } while (input.Next());
            return(new Lexeme(keywords.Contains(s) ? TokenType.KEYWORD : TokenType.IDENTIFIER, pos, s));
        }
示例#2
0
 /*
  * Expect functions return Lexemes of the tokens they expect.
  * If the input contains any unexpected characters,
  * an exception is thrown.
  *
  * Expected regex: %d+
  */
 public Lexeme ExpectNumber()
 {
     InputBuffer.Position pos = input.GetPosition();
     System.String        s   = "";
     if (input.HasCharacter())
     {
         do
         {
             // Match any numbers
             if (!Char.IsDigit(input.PeekCharacter()))
             {
                 break;
             }
             s += input.PeekCharacter();
         } while (input.Next());
         if (s.Length > 0)
         {
             // If matched one or more numbers..
             int parsed = 0;
             // make sure the number is convertible to integer
             if (!int.TryParse(s, out parsed))
             {
                 throw new LexerException("too high constant value", pos);
             }
             return(new Lexeme(TokenType.NUMBER, pos, s));
         }
     }
     // Did not match any number
     throw new LexerException("number expected", pos);
 }
示例#3
0
        /*
         * Expect functions return Lexemes of the tokens they expect.
         * If the input contains any unexpected characters,
         * an exception is thrown.
         *
         * Expected regular definition: comment
         */
        // comment := "/*" commentend
        // commentend := ([^/][^*] | comment)* "*/"
        public Lexeme ExpectBlockComment()
        {
            InputBuffer.Position pos = input.GetPosition();
            if (!ExpectToken("/*")) // block comment start
            {
                throw new LexerException("blockcomment expected", pos);
            }
            string s = "";
            int    nestedComments = 0; // counter for level of comment nesting

            if (input.HasCharacter())
            {
                do
                {
                    if (input.HasNextCharacter() && input.PeekCharacter() == '/' && input.PeekNext() == '*')
                    {
                        // nested comment start found
                        // read /*
                        s += input.PeekCharacter();
                        if (input.Next())
                        {
                            s += input.PeekCharacter();
                        }
                        ++nestedComments;
                        continue;
                    }
                    if (input.HasNextCharacter() && input.PeekCharacter() == '*' && input.PeekNext() == '/')
                    {
                        // comment end found
                        if (nestedComments <= 0)
                        {
                            // the end is final comment end
                            // skip */
                            input.Next();
                            input.Next();
                            return(new Lexeme(TokenType.BLOCKCOMMENT, pos, s));
                        }
                        // the end is nested
                        // read */
                        s += input.PeekCharacter();
                        if (input.Next())
                        {
                            s += input.PeekCharacter();
                        }
                        --nestedComments;
                        continue;
                    }
                    s += input.PeekCharacter();
                } while (input.Next());
            }
            throw new LexerException(string.Format("unexpected end of blockcomment starting at {0}", pos.ToString()), input.GetPosition());
        }
示例#4
0
        /*
         * Expect functions return Lexemes of the tokens they expect.
         * If the input contains any unexpected characters,
         * an exception is thrown.
         *
         * Expected regex: "(\\[^\n]|[^"\n])*"
         */
        public Lexeme ExpectString()
        {
            InputBuffer.Position pos = input.GetPosition();
            if (!ExpectToken("\"")) // Expect string starting quote
            {
                throw new LexerException("string expected", pos);
            }
            string s = "";

            if (input.HasCharacter())
            {
                do
                {
                    if (input.PeekCharacter() == '\n') // unexpected newline in middle of string
                    {
                        break;
                    }
                    if (input.PeekCharacter() == '\\') // escaped character
                    {
                        // skip escape character
                        if (!input.Next())
                        {
                            break; // end of input after escape character
                        }
                        // handle escaped character
                        switch (input.PeekCharacter())
                        {
                        case 'n': s += '\n'; break;

                        case 't': s += '\t'; break;

                        case '"': s += '"'; break;

                        default:
                            throw new LexerException(
                                      string.Format("unrecognized escape character {0}", input.PeekCharacter()),
                                      input.GetPosition()
                                      );
                        }
                        continue;
                    }
                    if (input.PeekCharacter() == '"') // string end quote
                    {
                        input.Next();
                        return(new Lexeme(TokenType.STRING, pos, s));
                    }
                    s += input.PeekCharacter();
                } while (input.Next());
            }
            throw new LexerException(string.Format("unexpected end of string starting at {0}", pos.ToString()), input.GetPosition());
        }
示例#5
0
        /*
         * Expect functions return Lexemes of the tokens they expect.
         * If the input contains any unexpected characters,
         * an exception is thrown.
         *
         * Expected regex: //[^\n]*
         */
        public Lexeme ExpectComment()
        {
            InputBuffer.Position pos = input.GetPosition();
            if (!ExpectToken("//")) // Expect comment start
            {
                throw new LexerException("comment expected", pos);
            }
            string s = "";

            if (input.HasCharacter())
            {
                do
                {
                    // Match anything until a new line or end of file reached
                    if (input.PeekCharacter() == '\n')
                    {
                        break;
                    }
                    s += input.PeekCharacter();
                } while (input.Next());
            }
            return(new Lexeme(TokenType.COMMENT, pos, s));
        }
示例#6
0
 public LexerException(string message, InputBuffer.Position position) : base(message)
 {
     this.position = position;
 }
示例#7
0
        /*
         * Tries to read the input to generate a new Lexeme.
         * Returns true if a new Lexeme was generated and false otherwise.
         * Can throw LexerException if Lexer encountered unexpected tokens.
         */
        public bool LexNext()
        {
            // Fetch current and next character from input
            if (!input.HasCharacter())
            {
                return(false);
            }
            char current = input.PeekCharacter();
            char next    = input.HasNextCharacter() ? input.PeekNext() : ' '; // Whitespace used as next character at end of file

            try
            {
                // Use conditional blocks as a kind of switch or lookup table
                if (current == '/' && next == '/')
                {
                    ExpectComment();
                    return(LexNext());
                }
                else if (current == '/' && next == '*')
                {
                    ExpectBlockComment();
                    return(LexNext());
                }
                else if (Char.IsLetter(current))
                {
                    lexemes.Add(ExpectIdentifierOrKeyword());
                }
                else if (
                    current == ':' && next == '=')
                {
                    lexemes.Add(new Lexeme(TokenType.SEPARATOR, input.GetPosition(), current.ToString() + next.ToString()));
                    input.Next();
                    input.Next();
                }
                else if (
                    current == '.' && next == '.')
                {
                    lexemes.Add(new Lexeme(TokenType.SEPARATOR, input.GetPosition(), current.ToString() + next.ToString()));
                    input.Next();
                    input.Next();
                }
                else if (
                    current == '(' ||
                    current == ')' ||
                    current == ':' ||
                    current == ';')
                {
                    lexemes.Add(new Lexeme(TokenType.SEPARATOR, input.GetPosition(), current.ToString()));
                    input.Next();
                }
                else if (
                    current == '<' ||
                    current == '=' ||
                    current == '!' ||
                    current == '&' ||
                    current == '+' ||
                    current == '-' ||
                    current == '/' ||
                    current == '*')
                {
                    lexemes.Add(new Lexeme(TokenType.OPERATOR, input.GetPosition(), current.ToString()));
                    input.Next();
                }
                else if (Char.IsDigit(current))
                {
                    lexemes.Add(ExpectNumber());
                }
                else if (current == '"')
                {
                    lexemes.Add(ExpectString());
                }
                else if (Char.IsWhiteSpace(current))
                {
                    // skip
                    input.Next();
                    return(LexNext());
                }
                else
                {
                    InputBuffer.Position pos = input.GetPosition();
                    input.Next();
                    throw new LexerException(string.Format("unrecognized token beginning with {0} followed by {1}", current, next), pos);
                }
            }
            catch (LexerException e)
            {
                // an error occurred
                // set errored to true, print the error and
                //  continue on next line.
                errored = true;
                io.WriteLine("Lexical error at {0}: {1}", e.position, e.ToString());
                SkipToNextLine();
                return(LexNext());
            }
            return(true);
        }
示例#8
0
 public Lexeme(TokenType type, InputBuffer.Position position, string token)
 {
     this.type     = type;
     this.token    = token;
     this.position = position;
 }