private static IEnumerable <IToken> Lex(CodeFile file, Diagnostics diagnostics) { var code = file.Code; var text = code.Text; var tokenStart = 0; var tokenEnd = -1; // One past the end position to allow for zero length spans while (tokenStart < text.Length) { var currentChar = text[tokenStart]; switch (currentChar) { case '{': yield return(TokenFactory.OpenBrace(SymbolSpan())); break; case '}': yield return(TokenFactory.CloseBrace(SymbolSpan())); break; case '(': yield return(TokenFactory.OpenParen(SymbolSpan())); break; case ')': yield return(TokenFactory.CloseParen(SymbolSpan())); break; case '[': yield return(TokenFactory.OpenBracket(SymbolSpan())); break; case ']': yield return(TokenFactory.CloseBracket(SymbolSpan())); break; case ';': yield return(TokenFactory.Semicolon(SymbolSpan())); break; case ',': yield return(TokenFactory.Comma(SymbolSpan())); break; case '#': if (NextCharIs('#')) { // it is `##` yield return(TokenFactory.HashHash(SymbolSpan(2))); } else { // it is `#` yield return(TokenFactory.Hash(SymbolSpan(1))); } break; case '.': if (NextCharIs('.')) { if (CharAtIs(2, '<')) { // it is `..<` yield return(TokenFactory.DotDotLessThan(SymbolSpan(3))); } else { // it is `..` yield return(TokenFactory.DotDot(SymbolSpan(2))); } } else { yield return(TokenFactory.Dot(SymbolSpan())); } break; case ':': if (NextCharIs(':')) { // it is `::` yield return(TokenFactory.ColonColon(SymbolSpan(2))); } else { // it is `:` yield return(TokenFactory.Colon(SymbolSpan())); } break; case '?': switch (NextChar()) { case '?': // it is `??` yield return(TokenFactory.QuestionQuestion(SymbolSpan(2))); break; case '.': // it is `?.` yield return(TokenFactory.QuestionDot(SymbolSpan(2))); break; default: // it is `?` yield return(TokenFactory.Question(SymbolSpan())); break; } break; case '|': yield return(TokenFactory.Pipe(SymbolSpan())); break; case '$': yield return(TokenFactory.Dollar(SymbolSpan())); break; case '→': yield return(TokenFactory.RightArrow(SymbolSpan())); break; case '@': yield return(TokenFactory.AtSign(SymbolSpan())); break; case '^': if (NextCharIs('.')) { // it is `^.` yield return(TokenFactory.CaretDot(SymbolSpan(2))); } else { // it is `^` yield return(TokenFactory.Caret(SymbolSpan())); } break; case '+': if (NextCharIs('=')) { // it is `+=` yield return(TokenFactory.PlusEquals(SymbolSpan(2))); } else { // it is `+` yield return(TokenFactory.Plus(SymbolSpan())); } break; case '-': switch (NextChar()) { case '=': // it is `-=` yield return(TokenFactory.MinusEquals(SymbolSpan(2))); break; case '>': // it is `->` yield return(TokenFactory.RightArrow(SymbolSpan(2))); break; default: // it is `-` yield return(TokenFactory.Minus(SymbolSpan())); break; } break; case '*': if (NextCharIs('=')) { // it is `*=` yield return(TokenFactory.AsteriskEquals(SymbolSpan(2))); } else { // it is `*` yield return(TokenFactory.Asterisk(SymbolSpan())); } break; case '/': switch (NextChar()) { case '/': // it is a line comment `//` tokenEnd = tokenStart + 2; // Include newline at end while (tokenEnd < text.Length) { currentChar = text[tokenEnd]; tokenEnd += 1; if (currentChar == '\r' || currentChar == '\n') { break; } } yield return(TokenFactory.Comment(TokenSpan())); break; case '*': // it is a block comment `/*` tokenEnd = tokenStart + 2; var lastCharWasStar = false; // Include slash at end for (; ;) { // If we ran into the end of the file, error if (tokenEnd >= text.Length) { diagnostics.Add(LexError.UnclosedBlockComment(file, TextSpan.FromStartEnd(tokenStart, tokenEnd))); break; } currentChar = text[tokenEnd]; tokenEnd += 1; if (lastCharWasStar && currentChar == '/') { break; } lastCharWasStar = currentChar == '*'; } yield return(TokenFactory.Comment(TokenSpan())); break; case '=': // it is `/=` yield return(TokenFactory.SlashEquals(SymbolSpan(2))); break; default: // it is `/` yield return(TokenFactory.Slash(SymbolSpan())); break; } break; case '=': switch (NextChar()) { case '>': // it is `=>` yield return(TokenFactory.EqualsGreaterThan(SymbolSpan(2))); break; case '=': // it is `==` yield return(TokenFactory.EqualsEquals(SymbolSpan(2))); break; case '/': if (CharAtIs(2, '=')) { // it is `=/=` yield return(TokenFactory.NotEqual(SymbolSpan(3))); } else { goto default; } break; default: // it is `=` yield return(TokenFactory.Equals(SymbolSpan())); break; } break; case '≠': yield return(TokenFactory.NotEqual(SymbolSpan())); break; case '>': if (NextCharIs('=')) { // it is `>=` yield return(TokenFactory.GreaterThanOrEqual(SymbolSpan(2))); } else { // it is `>` yield return(TokenFactory.GreaterThan(SymbolSpan())); } break; case '≥': case '⩾': yield return(TokenFactory.GreaterThanOrEqual(SymbolSpan())); break; case '<': switch (NextChar()) { case '=': // it is `<=` yield return(TokenFactory.LessThanOrEqual(SymbolSpan(2))); break; case ':': // it is `<:` yield return(TokenFactory.LessThanColon(SymbolSpan(2))); break; case '.': if (CharAtIs(2, '.')) { if (CharAtIs(3, '<')) { // it is `<..<` yield return(TokenFactory.LessThanDotDotLessThan(SymbolSpan(4))); } else { // it is `<..` yield return(TokenFactory.LessThanDotDot(SymbolSpan(3))); } } else { goto default; } break; default: // it is `<` yield return(TokenFactory.LessThan(SymbolSpan())); break; } break; case '≤': case '⩽': yield return(TokenFactory.LessThanOrEqual(SymbolSpan())); break; case '"': yield return(LexString()); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { tokenEnd = tokenStart + 1; while (tokenEnd < text.Length && IsIntegerCharacter(text[tokenEnd])) { tokenEnd += 1; } var span = TokenSpan(); var value = BigInteger.Parse(code[span]); yield return(TokenFactory.IntegerLiteral(span, value)); break; } case '\\': { tokenEnd = tokenStart + 1; while (tokenEnd < text.Length && IsIdentifierCharacter(text[tokenEnd])) { tokenEnd += 1; } var identifierStart = tokenStart + 1; yield return(TokenFactory.EscapedIdentifier(TokenSpan(), text.Substring(identifierStart, tokenEnd - identifierStart))); break; } default: if (char.IsWhiteSpace(currentChar)) { tokenEnd = tokenStart + 1; // Include whitespace at end while (tokenEnd < text.Length && char.IsWhiteSpace(text[tokenEnd])) { tokenEnd += 1; } yield return(TokenFactory.Whitespace(TokenSpan())); } else if (IsIdentifierStartCharacter(currentChar)) { tokenEnd = tokenStart + 1; while (tokenEnd < text.Length && IsIdentifierCharacter(text[tokenEnd])) { tokenEnd += 1; } yield return(NewIdentifierOrKeywordToken()); } else if (currentChar == '!' && NextCharIs('=')) { var span = SymbolSpan(2); diagnostics.Add(LexError.CStyleNotEquals(file, span)); yield return(TokenFactory.NotEqual(span)); } else { var span = SymbolSpan(); diagnostics.Add(LexError.UnexpectedCharacter(file, span, currentChar)); yield return(TokenFactory.Unexpected(span)); } break; } tokenStart = tokenEnd; } // The end of file token provides something to attach any final errors to yield return(TokenFactory.EndOfFile(SymbolSpan(0))); yield break; TextSpan SymbolSpan(int length = 1) { var end = tokenStart + length; return(TokenSpan(end)); } TextSpan TokenSpan(int?end = null) { tokenEnd = end ?? tokenEnd; return(TextSpan.FromStartEnd(tokenStart, tokenEnd)); } IToken NewIdentifierOrKeywordToken() { var span = TextSpan.FromStartEnd(tokenStart, tokenEnd); var value = code[span]; if (TokenTypes.KeywordFactories.TryGetValue(value, out var keywordFactory)) { return(keywordFactory(span)); } return(TokenFactory.BareIdentifier(span, value)); } char?NextChar() { var index = tokenStart + 1; return(index < text.Length ? text[index] : default);