Пример #1
0
        /// 
        /// Scans through the file and parses the next token it finds. It 
        /// returns the token, but also provides it as a property for 
        /// convenience.
        /// 
        /// An example:
        /// 
        /// // expecting an identifier
        /// if (lexer.Scan().token != TokenType.Identifier)
        /// {
        ///	 Console.WriteLine("Expecting an identifier but found:");
        ///	 lexer.PrintToken('!');
        /// }
        /// 
        public Token Scan()
        {
            if (Buffer.Count > 0)
            {
                //Console.WriteLine("Found a buffered Token from last Scan: {0}", buffer.Peek().type);
                _token = Buffer.Dequeue();
                return _token;
            }

            Forward();

            if (Buffer.Count > 0)
            {
                //Console.WriteLine("Found a buffered Token from Forward: {0}", buffer.Peek().type);
                _token = Buffer.Dequeue();
                return _token;
            }
            // if it's our first run, and we found something we need to start a statement
            if (IsFirstRun)
            {
                //Console.WriteLine("Because it's our first time we're adding a StartStatement Token");
                _token = CreateToken(TokenType.StartStatement, "");
                IsFirstRun = false;
                return _token;
            }

            _token.offset = Offset;
            _token.column = Column;
            _token.indent = Indent;
            _token.line = Line;
            _token.lexer = this;
            _token.value = "Lexer Fell Through";
            _token.length = 0;
            _token.type = TokenType.Error;

            // get the current character
            char c = Source[Offset];

            if (c == '"')
            {
                Offset++;
                Column++;

                while (Offset < Source.Length
                        && (Source[Offset] != '"'
                            || Source[Offset - 1] == '\\'))
                {
                    Column++;
                    if (Source[Offset] == '\n')
                    {
                        // error
                        Line++;
                        Indent = 0;
                        Column = 0;
                        return Error("Newline within a string literal");
                    }
                    Offset++;
                }

                if (Offset >= Source.Length)
                {
                    // error
                    Offset = Source.Length;
                    return Error("Reached the end of the file while in a string literal");
                }

                Offset++;

                _token.length = Offset - _token.offset;
                _token.value = "";

                for (int i = 1; i < _token.length - 1; i++)
                {
                    if (Source[_token.offset + i] == '\\'
                            && _token.offset + i + 1 < Source.Length)
                    {
                        // remove the escaped character, anything not here just
                        // falls through, probably should be an error or warning
                        i++;
                        _token.value +=
                                Source[_token.offset + i] == '"' ? '"' :
                                Source[_token.offset + i] == '0' ? '\0' :
                                Source[_token.offset + i] == 'v' ? '\v' :
                                Source[_token.offset + i] == 'f' ? '\f' :
                                Source[_token.offset + i] == 'n' ? '\n' :
                                Source[_token.offset + i] == 't' ? '\t' :
                                Source[_token.offset + i] == 'b' ? '\b' :
                                Source[_token.offset + i] == 'r' ? '\r' :
                                Source[_token.offset + i] == 't' ? '\t' :
                                Source[_token.offset + i];
                    }
                    else
                        _token.value += Source[_token.offset + i];
                }

                _token.type = TokenType.String;
            }
            if (c == '\'')
            {
                Offset++;
                Column++;

                while (Offset < Source.Length
                        && (Source[Offset] != '\''
                            || Source[Offset - 1] == '\\'))
                {
                    Column++;
                    if (Source[Offset] == '\n')
                    {
                        // error
                        Line++;
                        Indent = 0;
                        Column = 0;
                        return Error("Newline within a string literal");
                    }
                    Offset++;
                }

                if (Offset >= Source.Length)
                {
                    // error
                    Offset = Source.Length;
                    return Error("Reached the end of the file while in a string literal");
                }

                Offset++;

                _token.length = Offset - _token.offset;

                for (int i = 1; i < _token.length - 1; i++)
                {
                    if (Source[_token.offset + i] == '\\'
                            && _token.offset + i + 1 < Source.Length)
                    {
                        // remove the escaped character, anything not here just
                        // falls through, probably should be an error or warning
                        i++;
                        _token.value +=
                                Source[_token.offset + i] == '\'' ? '\'' :
                                Source[_token.offset + i] == '0' ? '\0' :
                                Source[_token.offset + i] == 'v' ? '\v' :
                                Source[_token.offset + i] == 'f' ? '\f' :
                                Source[_token.offset + i] == 'n' ? '\n' :
                                Source[_token.offset + i] == 't' ? '\t' :
                                Source[_token.offset + i] == 'b' ? '\b' :
                                Source[_token.offset + i] == 'r' ? '\r' :
                                Source[_token.offset + i] == 't' ? '\t' :
                                Source[_token.offset + i];
                    }
                    else
                        _token.value += Source[_token.offset + i];
                }

                _token.type = TokenType.Character;
            }
            else if (Char.IsNumber(c))
            {
                // might be a number, but if it has letters afterwards it's an
                // identifier eg 3DObject
                while (Offset < Source.Length && Char.IsDigit(Source[Offset]))
                    Offset++;

                if (Offset < Source.Length && Char.IsLetter(Source[Offset]))
                {
                    // it's actually an identifier
                    while (Offset < Source.Length
                            && (Char.IsLetterOrDigit(Source[Offset])
                                || Source[Offset] == '_'))
                        Offset++;

                    _token.type = TokenType.Identifier;
                    CopyToken();
                }
                else if (Offset + 1 < Source.Length
                        && (Source[Offset] == '.'
                            || Char.IsDigit(Source[Offset + 1])))
                {
                    // it's a decimal
                    Offset++;
                    while (Offset < Source.Length
                            && Char.IsDigit(Source[Offset]))
                        Offset++;

                    _token.type = TokenType.Decimal;
                    CopyToken();
                }
                else
                {
                    // it's an integer
                    _token.type = TokenType.Integer;
                    CopyToken();
                }
            }
            else if (Char.IsLetter(c))
            {
                // it's an identifier
                while (Offset < Source.Length
                        && (Char.IsLetterOrDigit(Source[Offset])
                            || Source[Offset] == '_'))
                    Offset++;

                _token.type = TokenType.Identifier;
                CopyToken();
            }
            else if (c == '<'
                    || c == '>'
                    || c == '='
                    || c == '!'
                    || c == '*'
                    || c == '/'
                    || c == '%')
            {
                // might have an equals afterwards
                Offset++;
                if (Offset < Source.Length && Source[Offset] == '=')
                    Offset++;

                _token.type = TokenType.Symbol;
                CopyToken();
            }
            else if (c == '+')
            {
                Offset++;
                if (Offset < Source.Length
                        && (Source[Offset] == '+' || Source[Offset] == '='))
                    Offset++;

                _token.type = TokenType.Symbol;
                CopyToken();
            }
            else if (c == '-')
            {
                Offset++;
                if (Offset < Source.Length
                        && (Source[Offset] == '-' || Source[Offset] == '='))
                    Offset++;

                _token.type = TokenType.Symbol;
                CopyToken();
            }
            else if (Offset + 1 < Source.Length
                     && ((c == '&' && Source[Offset + 1] == '&')
                     || (c == '|' && Source[Offset + 1] == '|')))
            {
                Offset += 2;
                _token.type = TokenType.Symbol;
                CopyToken();
            }
            else if (c == '.'
                    || c == ','
                    || c == ':'
                    || c == ';'
                    || c == '('
                    || c == ')'
                    || c == '['
                    || c == ']')
            {
                Offset++;
                _token.type = TokenType.Symbol;
                CopyToken();
            }
            /* remember: take '[' out of the above list to put xml back in

            else if (c == '[')
            {
                // special case, this can contain xml... yay!
                offset++;
                int xmlStart = offset;

                while (xmlStart < source.Length
                       && (source[xmlStart] == ' '
                       || source[xmlStart] == '\n'
                       || source[xmlStart] == '\t'))
                    xmlStart++;

                if (xmlStart < source.Length && source[xmlStart] == '<')
                {
                    // we have xml
                    token.offset = xmlStart;
                    offset = xmlStart + 1;

                    while (offset < source.Length)
                    {
                        if (source[offset] == '>')
                        {
                            offset++;
                            while (offset < source.Length
                                   && (source[offset] == ' '
                                   || source[offset] == '\n'
                                   || source[offset] == '\t'))
                                offset++;

                            if (offset < source.Length && source[offset] == ']')
                            {
                                // end of the xml... yay!
                                token.type = TokenType.XML;
                                CopyToken();
                                offset++;

                                break;
                            }
                        }
                        else
                            offset++;
                    }

                    if (offset >= source.Length)
                        return Error("Reached the end of the file while in an XML block");
                }
                else
                {
                    token.type = TokenType.Symbol;
                    CopyToken();
                }
            }*/
            else
            {
                // unknown character, print out some shiz and skip
                PrintToken('!');
                Offset++;
                return Scan();

                // unknown character, move on one
                //offset++;
            }

            return _token;
        }
Пример #2
0
 /// 
 /// Load from a string
 /// 
 public Lexer(string location, string source)
     : base(location, source)
 {
     _token = new Token();
     Buffer = new Queue<Token>();
 }
Пример #3
0
        public string PrintToken(char icon, Token token)
        {
            int start = token.offset;
            int end = token.offset + token.length;

            start = Math.Min(start, Source.Length - 1);
            end = Math.Min(end, Source.Length - 1);

            while (start > 0 && Source[start - 1] != '\n')
                start--;

            while (start < Source.Length
                    && (Source[start] == '\t' || Source[start] == ' '))
                start++;

            while (end < Source.Length && Source[end] != '\n')
                end++;

            return String.Format("{0} {1}:{2} {3}\n {4}\n {5}{6}",
                    icon, Filename,
                    token.line, token.type,
                    Source.Substring(start, end - start),
                    new String(' ', token.offset - start),
                    new String('^', Math.Max(token.length, 1)));
        }