/// /// Scans through the file and parses the next token it finds. It /// returns the token, but also provides it as a property for /// convenience. /// /// An example: /// /// // expecting an identifier /// if (lexer.Scan().token != TokenType.Identifier) /// { /// Console.WriteLine("Expecting an identifier but found:"); /// lexer.PrintToken('!'); /// } /// public Token Scan() { if (Buffer.Count > 0) { //Console.WriteLine("Found a buffered Token from last Scan: {0}", buffer.Peek().type); _token = Buffer.Dequeue(); return _token; } Forward(); if (Buffer.Count > 0) { //Console.WriteLine("Found a buffered Token from Forward: {0}", buffer.Peek().type); _token = Buffer.Dequeue(); return _token; } // if it's our first run, and we found something we need to start a statement if (IsFirstRun) { //Console.WriteLine("Because it's our first time we're adding a StartStatement Token"); _token = CreateToken(TokenType.StartStatement, ""); IsFirstRun = false; return _token; } _token.offset = Offset; _token.column = Column; _token.indent = Indent; _token.line = Line; _token.lexer = this; _token.value = "Lexer Fell Through"; _token.length = 0; _token.type = TokenType.Error; // get the current character char c = Source[Offset]; if (c == '"') { Offset++; Column++; while (Offset < Source.Length && (Source[Offset] != '"' || Source[Offset - 1] == '\\')) { Column++; if (Source[Offset] == '\n') { // error Line++; Indent = 0; Column = 0; return Error("Newline within a string literal"); } Offset++; } if (Offset >= Source.Length) { // error Offset = Source.Length; return Error("Reached the end of the file while in a string literal"); } Offset++; _token.length = Offset - _token.offset; _token.value = ""; for (int i = 1; i < _token.length - 1; i++) { if (Source[_token.offset + i] == '\\' && _token.offset + i + 1 < Source.Length) { // remove the escaped character, anything not here just // falls through, probably should be an error or warning i++; _token.value += Source[_token.offset + i] == '"' ? '"' : Source[_token.offset + i] == '0' ? '\0' : Source[_token.offset + i] == 'v' ? '\v' : Source[_token.offset + i] == 'f' ? '\f' : Source[_token.offset + i] == 'n' ? '\n' : Source[_token.offset + i] == 't' ? '\t' : Source[_token.offset + i] == 'b' ? '\b' : Source[_token.offset + i] == 'r' ? '\r' : Source[_token.offset + i] == 't' ? '\t' : Source[_token.offset + i]; } else _token.value += Source[_token.offset + i]; } _token.type = TokenType.String; } if (c == '\'') { Offset++; Column++; while (Offset < Source.Length && (Source[Offset] != '\'' || Source[Offset - 1] == '\\')) { Column++; if (Source[Offset] == '\n') { // error Line++; Indent = 0; Column = 0; return Error("Newline within a string literal"); } Offset++; } if (Offset >= Source.Length) { // error Offset = Source.Length; return Error("Reached the end of the file while in a string literal"); } Offset++; _token.length = Offset - _token.offset; for (int i = 1; i < _token.length - 1; i++) { if (Source[_token.offset + i] == '\\' && _token.offset + i + 1 < Source.Length) { // remove the escaped character, anything not here just // falls through, probably should be an error or warning i++; _token.value += Source[_token.offset + i] == '\'' ? '\'' : Source[_token.offset + i] == '0' ? '\0' : Source[_token.offset + i] == 'v' ? '\v' : Source[_token.offset + i] == 'f' ? '\f' : Source[_token.offset + i] == 'n' ? '\n' : Source[_token.offset + i] == 't' ? '\t' : Source[_token.offset + i] == 'b' ? '\b' : Source[_token.offset + i] == 'r' ? '\r' : Source[_token.offset + i] == 't' ? '\t' : Source[_token.offset + i]; } else _token.value += Source[_token.offset + i]; } _token.type = TokenType.Character; } else if (Char.IsNumber(c)) { // might be a number, but if it has letters afterwards it's an // identifier eg 3DObject while (Offset < Source.Length && Char.IsDigit(Source[Offset])) Offset++; if (Offset < Source.Length && Char.IsLetter(Source[Offset])) { // it's actually an identifier while (Offset < Source.Length && (Char.IsLetterOrDigit(Source[Offset]) || Source[Offset] == '_')) Offset++; _token.type = TokenType.Identifier; CopyToken(); } else if (Offset + 1 < Source.Length && (Source[Offset] == '.' || Char.IsDigit(Source[Offset + 1]))) { // it's a decimal Offset++; while (Offset < Source.Length && Char.IsDigit(Source[Offset])) Offset++; _token.type = TokenType.Decimal; CopyToken(); } else { // it's an integer _token.type = TokenType.Integer; CopyToken(); } } else if (Char.IsLetter(c)) { // it's an identifier while (Offset < Source.Length && (Char.IsLetterOrDigit(Source[Offset]) || Source[Offset] == '_')) Offset++; _token.type = TokenType.Identifier; CopyToken(); } else if (c == '<' || c == '>' || c == '=' || c == '!' || c == '*' || c == '/' || c == '%') { // might have an equals afterwards Offset++; if (Offset < Source.Length && Source[Offset] == '=') Offset++; _token.type = TokenType.Symbol; CopyToken(); } else if (c == '+') { Offset++; if (Offset < Source.Length && (Source[Offset] == '+' || Source[Offset] == '=')) Offset++; _token.type = TokenType.Symbol; CopyToken(); } else if (c == '-') { Offset++; if (Offset < Source.Length && (Source[Offset] == '-' || Source[Offset] == '=')) Offset++; _token.type = TokenType.Symbol; CopyToken(); } else if (Offset + 1 < Source.Length && ((c == '&' && Source[Offset + 1] == '&') || (c == '|' && Source[Offset + 1] == '|'))) { Offset += 2; _token.type = TokenType.Symbol; CopyToken(); } else if (c == '.' || c == ',' || c == ':' || c == ';' || c == '(' || c == ')' || c == '[' || c == ']') { Offset++; _token.type = TokenType.Symbol; CopyToken(); } /* remember: take '[' out of the above list to put xml back in else if (c == '[') { // special case, this can contain xml... yay! offset++; int xmlStart = offset; while (xmlStart < source.Length && (source[xmlStart] == ' ' || source[xmlStart] == '\n' || source[xmlStart] == '\t')) xmlStart++; if (xmlStart < source.Length && source[xmlStart] == '<') { // we have xml token.offset = xmlStart; offset = xmlStart + 1; while (offset < source.Length) { if (source[offset] == '>') { offset++; while (offset < source.Length && (source[offset] == ' ' || source[offset] == '\n' || source[offset] == '\t')) offset++; if (offset < source.Length && source[offset] == ']') { // end of the xml... yay! token.type = TokenType.XML; CopyToken(); offset++; break; } } else offset++; } if (offset >= source.Length) return Error("Reached the end of the file while in an XML block"); } else { token.type = TokenType.Symbol; CopyToken(); } }*/ else { // unknown character, print out some shiz and skip PrintToken('!'); Offset++; return Scan(); // unknown character, move on one //offset++; } return _token; }
/// /// Load from a string /// public Lexer(string location, string source) : base(location, source) { _token = new Token(); Buffer = new Queue<Token>(); }
public string PrintToken(char icon, Token token) { int start = token.offset; int end = token.offset + token.length; start = Math.Min(start, Source.Length - 1); end = Math.Min(end, Source.Length - 1); while (start > 0 && Source[start - 1] != '\n') start--; while (start < Source.Length && (Source[start] == '\t' || Source[start] == ' ')) start++; while (end < Source.Length && Source[end] != '\n') end++; return String.Format("{0} {1}:{2} {3}\n {4}\n {5}{6}", icon, Filename, token.line, token.type, Source.Substring(start, end - start), new String(' ', token.offset - start), new String('^', Math.Max(token.length, 1))); }