public static Token GetOpToken(Span span, string image) { // TODO(kai): make sure we have all special operators switch (image) { case "=": return Token.NewOperator(EQ, span, image); case ":": return Token.NewOperator(COLON, span, image); case ".": return Token.NewOperator(DOT, span, image); case "|": return Token.NewOperator(PIPE, span, image); case "&": return Token.NewOperator(AMP, span, image); case "^": return Token.NewOperator(CARET, span, image); case "->": return Token.NewOperator(ARROW, span, image); default: return Token.NewOperator(span, image); } }
private Token LexNumLiteral() { // TODO(kai): _ should be allowed as a separator in numbers, do that eventually. // ALSO UPDATE LexPrefixedInt WHEN YOU DO // Next up, gotta make sure that the radix stuff is ALWAYS integer, and allow different chars for 0x // TODO(kai): prefixes like 0x, 0c, 0b need to be checked. // The radix can simply be stored as an int and we can effectively ignore the prefix after that. var start = GetLocation(); int radix = 10; string image = ""; // TODO(kai): I don't actually like how this looks... // For now, it should work. if (c == '0') { // I need to check that this is in the form 0 radix-char digit first if (IsNumPrefix(Peek()) && IsDigitInRadix(Peek(2), 16)) { // skips 0 Advance(); radix = GetNumPrefixRadix(c); // skips the radix character Advance(); // It's a prefix we can use, not a suffix. // for example, 0x should be 0 with the suffix "x". // Of course, 0xF is NOT 0 with the suffix "xF" image = LexPrefixedInt(radix); } // otherwise, we fall through and continue as normal. } // For now I think this'll be the easiest way to check for things like 1.0.0, which is not a valid number literal. // This means things like 1.0.abs() or something would be valid. (rare cases for method calls mostly, occasionally fields if I add any to numbers) bool isFloat = false; // Only do this if the radix IS 10. // This is probably not a great way to handle this, sure // but it'll work for now. // Refactoring can be done later // TODO(kai): probably refactor this. if (radix == 10) { // I wonder if this can actually lex integers yet, actually. while (!EndOfSource) { // Digits should be allowed here, we can check special cases later. if (IsDigit(c)) { Bump(); // can't forget this, whoops. continue; } // Doing this, I can check all cases and hopefully that covers everything. switch (c) { // one posibility for floats case '.': // I think we should also check if this is followed by a character. 1.field should be valid, not a literal with errors if (isFloat || IsIdentifierStart(Peek())) // exit our loop plz goto loop_end; isFloat = true; Bump(); break; // another posibility for floats case 'e': case 'E': // TODO After marking as a float, I should check the exponent isFloat = true; Bump(); // signs are allowed in exponents, so check those here and only here. if (c == '+' || c == '-') Bump(); break; default: // DUH goto loop_end; } } loop_end: image = GetString(); } // at this point, image should always contain a valid value. // The span of this literal var end = GetLocation(); // HERE we get the suffix var suffix = LexOptionalNumSuffix(); if (isFloat) { // Here, we need to parse our float. // I'm using ulong because the LLVM wrapper will expect all floats to be double eventually, and I don't // need to use them before then. double value = Convert.ToDouble(image); // NOTE end is saved for use elsewhere, mostly. Might be removed, use GetLocation() var span = new Span(fileName, start, GetLocation()); return Token.NewFloat(span, value, image, suffix); } else { // Here, we need to parse our integer. // I'm using ulong because the LLVM wrapper will expect all integers to be ulong eventually, and I don't // need to use them before then. // NOTE that this should never fail to parse. // we JUST lexed the number, and it should fit C# just fine. // If that happens to NOT be the case in the future, a different parse method should be added // to handle Score specific numbers. ulong value = Convert.ToUInt64(image, radix); // NOTE end is saved for use elsewhere, mostly. Might be removed, use GetLocation() var span = new Span(fileName, start, GetLocation()); return Token.NewInteger(span, value, image, suffix); } }
// TODO(kai): this should really just return a token, Result is getting annoying in C# public Token GetToken() { if (EndOfSource) // Returning error false means it was intended. return null; // where this token, no matter what, should start (hopefully, dunno yet actually). var start = GetLocation(); // This should be an identifier or keyword unless // it's v", in which case it should be a verbatim string // (handled below) if (IsIdentifierStart(c) && !IsNotIdentStart()) { var str = LexIdentStr(); var span = new Span(fileName, start, GetLocation()); // _ is a special token called Wildcard, much like how * works in other environments. if (str == "_") return Token.New(WILDCARD, span, "_"); // otherwise, it's an identifier if (IsKw(str)) return Token.NewKeyword(GetTypeFromKeyword(str), span, str); else if (IsBuiltinTyName(str)) return Token.NewBuiltinTyName(span, str); return Token.NewIdentifier(span, str); } // Should be a number literal, so get that. if (IsDigit(c)) return LexNumLiteral(); if (IsOperator(c) && !IsCommentStart() && !IsOpIdentStart()) return LexOperator(); switch (c) { // Let's handle whitespace, it should be completely ignored. // TODO(kai): this can probably go in a separate method EatWhitespace, but meh for now. case ' ': case '\t': case '\r': case '\n': // skip the whitespace Advance(); // Attempt to return a token, please. return GetToken(); // Other things case '`': return LexIdentOperator(); case '/': if (Peek() == '#') { // Lex past the line comment, then continue. EatLineComment(); return GetToken(); } else if (Peek() == '*') { EatBlockComment(); return GetToken(); } // If it's not a comment, it'll go into the operator lexer. // We don't have one yet, so TODO(kai): operator lexing. // just return null until then. return null; // These should NOT be identifiers, those are checked above. // These should turn into cstr/verbatim strings. case 'c': { Advance(); // 'c' bool verbatim = c == 'v'; if (verbatim) Advance(); // 'v' var str = LexStrLiteral(verbatim); var span = new Span(fileName, start, GetLocation()); return Token.NewString(span, str, verbatim, true); } case 'v': { Advance(); // 'v' bool cstr = c == 'c'; if (cstr) Advance(); // 'c' var str = LexStrLiteral(true); var span = new Span(fileName, start, GetLocation()); return Token.NewString(span, str, true, cstr); } // Just a normal string literal case '"': { var str = LexStrLiteral(false); var span = new Span(fileName, start, GetLocation()); return Token.NewString(span, str, false, false); } case '\'': // TODO(kai): Not sure what kind of modifiers we can have on chars. return LexCharLiteralOrSymbol(); case ',': Advance(); return Token.New(COMMA, new Span(fileName, start, GetLocation()), ","); case '(': Advance(); return Token.New(LPAREN, new Span(fileName, start, GetLocation()), "("); case ')': Advance(); return Token.New(RPAREN, new Span(fileName, start, GetLocation()), ")"); case '[': Advance(); return Token.New(LBRACKET, new Span(fileName, start, GetLocation()), "["); case ']': Advance(); return Token.New(RBRACKET, new Span(fileName, start, GetLocation()), "]"); case '{': Advance(); return Token.New(LBRACE, new Span(fileName, start, GetLocation()), "{"); case '}': Advance(); return Token.New(RBRACE, new Span(fileName, start, GetLocation()), "}"); default: // TODO(kai): fatal error, we don't know how to continue. return null; } }
/// <summary> /// Create a new token. /// This is basically an alias for the private constructor. /// The only resong for the existence of this method is consistency /// with the other means of constructing a token. /// </summary> /// <param name="type"></param> /// <param name="image"></param> /// <returns></returns> internal static Token New(TokenType type, Span span, string image) => new Token(type, span, image ?? "<no idea, friend>");
/// <summary> /// Adds an error message to this logger. /// /// Error messages prevent subsequent steps from executing. /// For example, if the lexing stage produces errors, the parsing /// stage will not occur, and the compilation fails. /// </summary> /// <param name="span">Where in the source file this error describes</param> /// <param name="format">The detail message</param> /// <param name="args">The arguments used to format the message</param> public void Error(Span span, string format, params object[] args) { errorCount++; details.Add(new Detail(Detail.Type.ERROR, span, string.Format(format, args))); }
/// <summary> /// Adds a warning message to this logger. /// </summary> /// <param name="span">Where in the source file this warning describes</param> /// <param name="format">The detail message</param> /// <param name="args">The arguments used to format the message</param> public void Warn(Span span, string format, params object[] args) => details.Add(new Detail(Detail.Type.WARN, span, string.Format(format, args)));
/// <summary> /// Adds an error message to this logger. /// /// Error messages prevent subsequent steps from executing. /// For example, if the lexing stage produces errors, the parsing /// stage will not occur, and the compilation fails. /// </summary> /// <param name="span">Where in the source file this error describes</param> /// <param name="message">The detail message</param> public void Error(Span span, string message) { errorCount++; details.Add(new Detail(Detail.Type.ERROR, span, message)); }
private Token LexCharLiteralOrSymbol() { var start = GetLocation(); Advance(); // ''' // It CAN'T be a symbol, so attempt to lex a char literal if (!IsIdentifierStart(c) || Peek() == '\'') { bool fail; var c = LexCharLiteral(out fail); var span = new Span(fileName, start, GetLocation()); return Token.NewChar(span, c); } else { var sym = LexIdentStr(); var span = new Span(fileName, start, GetLocation()); return Token.NewSymbol(span, sym); } }
/// <summary> /// Returns a new token for an identifier operator. /// </summary> /// <param name="image"></param> /// <returns></returns> internal static Token NewIdentifierOperator(Span span, string image) { var result = new Token(OP, span, image, tok => "`" + tok.Image); result.IsOp = true; return result; }
/// <summary> /// Returns a new token for a specific reserved operator. /// </summary> /// <param name="image"></param> /// <returns></returns> internal static Token NewOperator(TokenType type, Span span, string image) { var result = new Token(type, span, image); result.IsOp = true; return result; }
/// <summary> /// Returns a new token for a string literal. /// </summary> /// <param name="value"></param> /// <param name="verbatim"></param> /// <param name="cstr"></param> /// <returns></returns> internal static Token NewString(Span span, string value, bool verbatim, bool cstr) { var res = new Token(STR, span, value, tok => string.Format("{0}{1}\"{2}\"", tok.StrVerbatim ? "v" : "", tok.StrC ? "c" : "", value)); res.StrVerbatim = verbatim; res.StrC = cstr; return res; }
/// <summary> /// Returns a new token for a symbol. /// </summary> /// <param name="image"></param> /// <returns></returns> internal static Token NewSymbol(Span span, string image) => new Token(IDENT, span, image, tok => "'" + tok.Image);
/// <summary> /// Returns a new token for an identifier. /// </summary> /// <param name="image"></param> /// <returns></returns> internal static Token NewIdentifier(Span span, string image) => new Token(IDENT, span, image);
/// <summary> /// Returns a new token for a keyword. /// </summary> /// <param name="keyword"></param> /// <param name="image"></param> /// <returns></returns> internal static Token NewKeyword(TokenType keyword, Span span, string image) => new Token(keyword, span, image);
/// <summary> /// Returns a new token for a builtin type name. /// </summary> /// <param name="name"></param> /// <returns></returns> internal static Token NewBuiltinTyName(Span span, string name) => new Token(BUILTIN_TY_NAME, span, name);
private Token LexOperator() { var start = GetLocation(); // Get the operator for as long as there are operator characters while (!EndOfSource && IsOperator(c)) Bump(); // What the operator looks like now var image = GetString(); var span = new Span(fileName, start, GetLocation()); return GetOpToken(span, image); }
private Token LexIdentOperator() { var start = GetLocation(); Advance(); // '`' var image = LexIdentStr(); var span = new Span(fileName, start, GetLocation()); return Token.NewIdentifierOperator(span, image); }
/// <summary> /// Returns a new token for a character literal. /// </summary> /// <param name="value"></param> /// <returns></returns> internal static Token NewChar(Span span, uint value) { var res = new Token(CHAR, span, char.ConvertFromUtf32((int)value), tok => string.Format("'{0}'", tok.Image)); res.CharValue = value; return res; }
private ScoreVal Push(Span span, TyRef ty, LLVMValueRef value) { var val = new ScoreVal(span, ty, value); stack.Push(val); return val; }
/// <summary> /// Returns a new token for an integer literal. /// </summary> /// <param name="value"></param> /// <param name="image"></param> /// <param name="suffix"></param> /// <returns></returns> internal static Token NewInteger(Span span, ulong value, string image, string suffix) { var res = new Token(INT, span, image, tok => tok.Image + tok.NumericSuffix); res.IntegerValue = value; res.NumericSuffix = suffix; return res; }
public Detail(Type type, Span span, string message) { this.type = type; this.span = span; this.message = message; }
/// <summary> /// Returns a new token for a float literal. /// </summary> /// <param name="value"></param> /// <param name="image"></param> /// <param name="suffix"></param> /// <returns></returns> internal static Token NewFloat(Span span, double value, string image, string suffix) { var res = new Token(FLOAT, span, image, tok => tok.Image + tok.NumericSuffix); res.FloatValue = value; res.NumericSuffix = suffix; return res; }
/// <summary> /// Adds a warning message to this logger. /// </summary> /// <param name="span">Where in the source file this warning describes</param> /// <param name="message">The detail message</param> public void Warn(Span span, string message) => details.Add(new Detail(Detail.Type.WARN, span, message));
private Token(TokenType type, Span span, string image, Func<Token, string> dbg = null) { this.type = type; this.span = span; Image = image; this.dbg = dbg ?? (tok => tok.Image); }
public ScoreVal(Span span, TyRef ty, LLVMValueRef value) { this.ty = ty; this.value = value; }
private Spanned<TyRef> ParseTy() { var start = Current.span.start; if (Check(LPAREN) && NextCheck(RPAREN)) { var span = new Span(fileName, start, Next.span.end); Advance(); Advance(); return new Spanned<TyRef>(new Span(fileName, start, GetLastSpan().end), TyVoid.VoidTy); } if (!HasCurrent) { log.Error(GetSpan(), "Expected type, found end of source."); return null; } // TODO(kai): if (CheckOp(CARET) || CheckOp(AMP)) if (CheckOp(CARET)) { var isPointer = CheckOp(CARET); AdvanceOp(isPointer ? CARET : AMP); bool isMut = false; if (Check(MUT)) { Advance(); isMut = true; } var type = ParseTy(); // TODO(kai): return isPointer ? TyRef.PointerTo(type, isMut) as TyRef : TyRef.ReferenceTo(type, isMut) as TyRef; return new Spanned<TyRef>(new Span(fileName, start, type.span.end), new PointerTyRef(type.value, isMut)); } switch (Current.type) { case IDENT: { var span = Current.span; var name = Current.Image; Advance(); return new Spanned<TyRef>(span, new PathTyRef(name)); } case BUILTIN_TY_NAME: { var name = Current.Image; Advance(); return new Spanned<TyRef>(GetLastSpan(), BuiltinTyRef.GetByName(name)); } default: { log.Error(GetSpan(), "Failed to parse type."); return null; //var name = ParseQualifiedNameWithTyArgs(); //return new Spanned<TyRef>(name.Span, TyRef.For(TyVariant.GetFor(name))); } } }