Exemplo n.º 1
0
        private bool ScanToken(ref TokenInfo info)
        {
            info.Text      = string.Empty;
            info.TokenType = TokenKind.None;

            var currentChar = _scanner.Advance();

            ConsumeWhiteSpace(ref currentChar);

            switch (currentChar)
            {
            case '+':
            case '-':
            case '%':
            case '*':
            case '/':
            case '\\':
            case '^':
                info.TokenType = TokenKind.Operator;
                break;

            case '&':
                if (_scanner.Peek() == '&')
                {
                    _scanner.Advance();
                }
                info.TokenType = TokenKind.Operator;
                break;

            case '|':
                if (_scanner.Peek() == '|')
                {
                    _scanner.Advance();
                }
                info.TokenType = TokenKind.Operator;
                break;

            case '!':
                if (_scanner.Peek() == '=')
                {
                    _scanner.Advance();
                }
                info.TokenType = TokenKind.Operator;
                break;

            case '<':
                if (_scanner.Peek() == '=' ||
                    _scanner.Peek() == '<')
                {
                    _scanner.Advance();
                }
                info.TokenType = TokenKind.Operator;
                break;

            case '>':
                if (_scanner.Peek() == '=' ||
                    _scanner.Peek() == '>')
                {
                    _scanner.Advance();
                }
                info.TokenType = TokenKind.Operator;
                break;

            case '=':
            {
                if (_scanner.Peek() == '>')
                {
                    _scanner.Advance();
                    info.TokenType = TokenKind.Define;
                    break;
                }
                if (_scanner.Peek() == '=')
                {
                    _scanner.Advance();
                    info.TokenType = TokenKind.Operator;
                    break;
                }
                info.TokenType = TokenKind.Assign;
                break;
            }

            case ':':
                info.TokenType = TokenKind.Colon;
                break;

            case ';':
                info.TokenType = TokenKind.Semicolon;
                break;

            case ',':
                info.TokenType = TokenKind.Comma;
                break;

            case '.':
            {
                var peekChar = _scanner.Peek();
                if (peekChar == '.')
                {
                    _scanner.Advance();
                    info.TokenType = TokenKind.DotDot;
                    break;
                }

                // Real literals don't have to start with a digit (e.g. .123)
                if (char.IsDigit(peekChar))
                {
                    ScanNumeric(ref info, ref currentChar);
                    break;
                }

                info.TokenType = TokenKind.Dot;
                break;
            }

            case '?':
                info.TokenType = TokenKind.QuestionMark;
                break;

            case '\'':
                ScanCharLiteral(ref info, ref currentChar);
                break;

            case '\"':
                ScanStringLiteral(ref info, ref currentChar);
                break;

            case '(':
            {
                if (_scanner.Peek() == '*')
                {
                    currentChar = _scanner.Advance();
                    ConsumeComment(ref currentChar);
                    return(ScanToken(ref info));
                }

                info.TokenType = TokenKind.LeftParen;
                break;
            }

            case ')':
                // ScanComment will handle comment endings
                info.TokenType = TokenKind.RightParen;
                break;

            case '{':
                info.TokenType = TokenKind.LeftBrack;
                break;

            case '}':
                info.TokenType = TokenKind.RightBrack;
                break;

            case '[':
                info.TokenType = TokenKind.LeftSquareBrack;
                break;

            case ']':
                info.TokenType = TokenKind.RightSquareBrack;
                break;

            case '_':
            case char c when char.IsLetter(c):
                ScanIdentifierOrKeyword(ref info, ref currentChar);

                break;

            case char c when char.IsDigit(c):
                ScanNumeric(ref info, ref currentChar);

                break;

            case char c when c == _scanner.InvalidItem:
                // True end of file, exit completely
                if (_scanner.IsAtEnd())
                {
                    return(false);
                }

                // Check if is just a bad char in source.
                // If not at end and is a bad char don't stop the scanning
                // process - exit method normally. Parsing will catch the bad token
                if (_scanner.Position <= _scanner.SourceLength &&
                    _scanner.Base < _scanner.SourceLength)
                {
                    break;
                }

                // Should be end of file, otherwise it is in a bad state, exit
                return(false);

            default:
                if (!_flags.AllowUnicode())
                {
                    throw new IllegalCharacterException("Character not allowed", _scanner.CurrentSourceLine)
                          {
                              Character = currentChar
                          }
                }
                ;
                throw new FeatureNotSupportedException("Unicode identifiers currently not supported",
                                                       _scanner.CurrentSourceLine)
                      {
                          FeatureName = "Unicode Identifiers"
                      };
            }

            info.Text             = _scanner.CurrentWindow;
            info.SourceLineNumber = _scanner.CurrentSourceLine;
            _scanner.Base         = _scanner.Position;
            return(true);
        }
Exemplo n.º 2
0
        private static IToken CreateValueToken(ref TokenInfo info, ref LexerFlag flags)
        {
            switch (info.TokenType)
            {
            case TokenKind.SignedInt:
            {
                if (!long.TryParse(info.Text, out var longValue))
                {
                    throw new InvalidTokenException("Unable to parse 64bit signed integer value", info.SourceLineNumber)
                          {
                              InvalidText = info.Text
                          }
                }
                ;

                return(new SignedIntToken
                    {
                        Value = longValue,
                        LineNumber = info.SourceLineNumber
                    });
            }

            case TokenKind.UnsignedInt:
            {
                if (!ulong.TryParse(info.Text.TrimEnd('u').TrimEnd('U'), out var longValue))
                {
                    throw new InvalidTokenException("Unable to parse 64bit unsigned integer value", info.SourceLineNumber)
                          {
                              InvalidText = info.Text
                          }
                }
                ;

                return(new UnsignedIntToken
                    {
                        Value = longValue,
                        LineNumber = info.SourceLineNumber
                    });
            }

            case TokenKind.Real:
            {
                if (!double.TryParse(info.Text, out var longValue))
                {
                    throw new InvalidTokenException("Unable to parse double precision floating point value", info.SourceLineNumber)
                          {
                              InvalidText = info.Text
                          }
                }
                ;

                return(new RealToken
                    {
                        Value = longValue,
                        LineNumber = info.SourceLineNumber
                    });
            }

            case TokenKind.Keyword:
            case TokenKind.Identifier:
            {
                var text    = info.Text;
                var keyword = System.Enum.GetNames(typeof(Keyword)).FirstOrDefault(kw => kw.ToLower() == text);
                if (keyword != null)
                {
                    return new KeywordToken
                           {
                               Value      = System.Enum.Parse <Keyword>(keyword),
                               LineNumber = info.SourceLineNumber
                           }
                }
                ;

                return(new IdToken
                    {
                        Value = info.Text,
                        LineNumber = info.SourceLineNumber
                    });
            }

            case TokenKind.BoolLiteral:
            {
                switch (info.Text)
                {
                case "true":
                case "false":
                    if (!bool.TryParse(info.Text, out var boolValue))
                    {
                        throw new InvalidTokenException("Unable to parse bool literal", info.SourceLineNumber)
                              {
                                  InvalidText = info.Text
                              }
                    }
                    ;

                    return(new BoolLiteralToken
                        {
                            Value = boolValue,
                            LineNumber = info.SourceLineNumber
                        });

                case string s when s.ToLower() == "true" || s.ToLower() == "false":
                    throw new InvalidTokenException("Unable to parse bool literal", info.SourceLineNumber)
                    {
                        InvalidText = info.Text
                    };

                default:
                    // Lexer only does a basic check for literal, there is an edge case where an identifer
                    // can sneek past that check
                    return(new IdToken
                        {
                            Value = info.Text,
                            LineNumber = info.SourceLineNumber
                        });
                }
            }

            case TokenKind.StringLiteral:
            {
                var stringValue = info.Text.TrimStart('\"').TrimEnd('\"');
                return(new StringLiteralToken
                    {
                        Value = stringValue,
                        LineNumber = info.SourceLineNumber
                    });
            }

            case TokenKind.CharLiteral:
            {
                var rawChar = info.Text.TrimStart('\'').TrimEnd('\'');
                if (rawChar.Length > 1 && (rawChar[0] != '\\' || rawChar.Length > 2))
                {
                    throw new InvalidTokenException("Char literal too long", info.SourceLineNumber)
                          {
                              InvalidText = rawChar
                          }
                }
                ;

                if (!char.TryParse(rawChar, out var charValue))
                {
                    throw new InvalidTokenException("Char value invalid", info.SourceLineNumber)
                          {
                              InvalidText = rawChar
                          }
                }
                ;

                if (charValue > 127 && !flags.AllowUnicode())
                {
                    throw new IllegalCharacterException("Only ASCII characters are currently supported",
                                                        info.SourceLineNumber)
                          {
                              Character = charValue
                          }
                }
                ;

                return(new CharLiteralToken
                    {
                        Value = charValue,
                        LineNumber = info.SourceLineNumber
                    });
            }

            case TokenKind.Operator:
            {
                var text          = info.Text;
                var operatorValue = TokenHelper.OperatorValueMap
                                    .FirstOrDefault(kv => kv.Value == text.Trim()).Key;

                if (operatorValue == default(Operator))
                {
                    throw new InvalidTokenException("Unknown operator found", info.SourceLineNumber)
                          {
                              InvalidText = info.Text
                          }
                }
                ;

                return(new OperatorToken
                    {
                        Value = operatorValue,
                        LineNumber = info.SourceLineNumber
                    });
            }

            default:
                throw new NotSupportedException($"TokenType {info.TokenType} currently not supported");
            }
        }