public Error CreateError(string s) { var error = new SyntaxError(s); error.AddStackTrace(GetLocation(_index)); return(error); }
public static IList <Token> Lex(string code, int fileId) { var tokens = new List <Token>(); var index = 0; var buffer = ""; var inString = false; var stringStart = '\0'; var escaping = false; var inOperator = false; var inNumber = false; var inComment = false; var commentStart = ""; var line = 1; var character = 1; Error createError(string message) { var error = new SyntaxError(message); error.AddStackTrace(new Debug.Location(fileId, line)); return(error); } void clearBuffer(Token.Group type, bool force = false) { if (buffer.Length > 0 || force) { if (type == Token.Group.Operator && !Tokens.IsValidOperator(buffer)) { for (var start = 0; start < buffer.Length;) { var found = false; for (var end = buffer.Length; end > start; end--) { var sub = buffer.Substring(start, end - start); if (Tokens.IsValidOperator(sub)) { tokens.Add(new Token(sub, type, line, character)); start = end; found = true; break; } } if (!found) { throw createError($"Invalid operator '{buffer}'"); } } } else { tokens.Add(new Token(buffer, type, line, character)); } buffer = ""; } } void incrementPosition(char c) { if (c == '\n') { line++; character = 1; } else { character++; } } while (index < code.Length) { var c = code[index]; incrementPosition(c); if (inOperator) { if (IsOperator(c)) { buffer += c; index++; continue; } else { inOperator = false; clearBuffer(Token.Group.Operator); } } if (inNumber) { if ((c >= '0' && c <= '9') || c == '.' || c == 'e' || c == 'E' || c == '-') { buffer += c; index++; continue; } else { inNumber = false; clearBuffer(Token.Group.Number); } } if (inComment) { if (c == '\n' && commentStart == "//") { clearBuffer(Token.Group.Comment); inComment = false; } else if (commentStart == "/*" && c == '*' && index + 1 < code.Length && code[index + 1] == '/') { buffer += "*/"; clearBuffer(Token.Group.Comment); index += 2; inComment = false; continue; } else { buffer += c; index++; continue; } } if (inString) { if (escaping) { if (c == '\'' || c == '"' || c == '\\') { buffer += c; } else if (c == 'n') { buffer += '\n'; } else if (c == 'b') { buffer += '\b'; } else if (c == 'r') { buffer += '\r'; } else if (c == 'f') { buffer += '\f'; } else if (c == 't') { buffer += '\t'; } else if (c == 'v') { buffer += '\v'; } else { throw createError("Invalid escaped character '" + c + "'"); } escaping = false; } else { if (c == stringStart) { inString = false; clearBuffer(stringStart == '`' ? Token.Group.Template : Token.Group.String, true); } else if (c == '\\') { escaping = true; } else if (c == '\r') { // TODO: ignore this? } else if (c == '\n' && stringStart != '`') { throw createError("Newline in string literal"); } else { buffer += c; } } } else { if (IsWhitespace(c)) { clearBuffer(Token.Group.Text); if (c != '\r') { tokens.Add(new Token(c.ToString(), Token.Group.WhiteSpace, line, character)); } } else if (IsStructure(c)) { clearBuffer(Token.Group.Text); tokens.Add(new Token(c.ToString(), Token.Group.Structure, line, character)); } else { var htmlIndex = IsHtml(code, index); if (htmlIndex != -1) { clearBuffer(Token.Group.Text); var html = code.Substring(index, htmlIndex - index + 1); tokens.Add(new Token(html, Token.Group.Html, line, character)); for (var h = 1; h < html.Length; h++) { incrementPosition(html[h]); } index = htmlIndex; } else if (c == '/' && index + 1 < code.Length && (code[index + 1] == '/' || code[index + 1] == '*')) { clearBuffer(Token.Group.Text); inComment = true; commentStart = c.ToString() + code[index + 1]; buffer = commentStart; index++; } else if (IsOperator(c)) { clearBuffer(Token.Group.Text); if (c == '.') { var next = index + 1 < code.Length ? code[index + 1] : '\0'; if (next >= '0' && next <= '9') { inNumber = true; } } if (!inNumber) { inOperator = true; } buffer += c; } else if (IsString(c)) { inString = true; stringStart = c; } else if (c >= '0' && c <= '9' && buffer.Length == 0) { inNumber = true; buffer += c; } else { buffer += c; } } } index++; } if (inOperator) { clearBuffer(Token.Group.Operator); } else if (inNumber) { clearBuffer(Token.Group.Number); } else if (inString) { throw new SyntaxError("Unterminated string literal"); } else { clearBuffer(Token.Group.Text); } return(tokens); }