public static void RunUnitTests() { Lexer lex = new Lexer("42 * 3.14158"); Check(lex.Dequeue(), Token.Type.Number, "42"); CheckLineNum(lex.lineNum, 1); Check(lex.Dequeue(), Token.Type.OpTimes); Check(lex.Dequeue(), Token.Type.Number, "3.14158"); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); CheckLineNum(lex.lineNum, 1); lex = new Lexer("6*(.1-foo) end if // and a comment!"); Check(lex.Dequeue(), Token.Type.Number, "6"); CheckLineNum(lex.lineNum, 1); Check(lex.Dequeue(), Token.Type.OpTimes); Check(lex.Dequeue(), Token.Type.LParen); Check(lex.Dequeue(), Token.Type.Number, ".1"); Check(lex.Dequeue(), Token.Type.OpMinus); Check(lex.Peek(), Token.Type.Identifier, "foo"); Check(lex.Peek(), Token.Type.Identifier, "foo"); Check(lex.Dequeue(), Token.Type.Identifier, "foo"); Check(lex.Dequeue(), Token.Type.RParen); Check(lex.Dequeue(), Token.Type.Keyword, "end if"); Check(lex.Dequeue(), Token.Type.EOL); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); CheckLineNum(lex.lineNum, 1); lex = new Lexer("\"foo\" \"isn't \"\"real\"\"\" \"now \"\"\"\" double!\""); Check(lex.Dequeue(), Token.Type.String, "foo"); Check(lex.Dequeue(), Token.Type.String, "isn't \"real\""); Check(lex.Dequeue(), Token.Type.String, "now \"\" double!"); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); lex = new Lexer("foo\nbar\rbaz\r\nbamf"); Check(lex.Dequeue(), Token.Type.Identifier, "foo"); CheckLineNum(lex.lineNum, 1); Check(lex.Dequeue(), Token.Type.EOL); Check(lex.Dequeue(), Token.Type.Identifier, "bar"); CheckLineNum(lex.lineNum, 2); Check(lex.Dequeue(), Token.Type.EOL); Check(lex.Dequeue(), Token.Type.Identifier, "baz"); CheckLineNum(lex.lineNum, 3); Check(lex.Dequeue(), Token.Type.EOL); Check(lex.Dequeue(), Token.Type.Identifier, "bamf"); CheckLineNum(lex.lineNum, 4); Check(lex.Dequeue(), Token.Type.EOL); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); Check(LastToken("x=42 // foo"), Token.Type.Number, "42"); Check(LastToken("x = [1, 2, // foo"), Token.Type.Comma); Check(LastToken("x = [1, 2 // foo"), Token.Type.Number, "2"); Check(LastToken("x = [1, 2 // foo // and \"more\" foo"), Token.Type.Number, "2"); Check(LastToken("x = [\"foo\", \"//bar\"]"), Token.Type.RSquare); Check(LastToken("print 1 // line 1\nprint 2"), Token.Type.Number, "2"); Check(LastToken("print \"Hi\"\"Quote\" // foo bar"), Token.Type.String, "Hi\"Quote"); }
public static void RunUnitTests() { Lexer lex = new Lexer("42 * 3.14158"); Check(lex.Dequeue(), Token.Type.Number, "42"); CheckLineNum(lex.lineNum, 1); Check(lex.Dequeue(), Token.Type.OpTimes); Check(lex.Dequeue(), Token.Type.Number, "3.14158"); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); CheckLineNum(lex.lineNum, 1); lex = new Lexer("6*(.1-foo) end if // and a comment!"); Check(lex.Dequeue(), Token.Type.Number, "6"); CheckLineNum(lex.lineNum, 1); Check(lex.Dequeue(), Token.Type.OpTimes); Check(lex.Dequeue(), Token.Type.LParen); Check(lex.Dequeue(), Token.Type.Number, ".1"); Check(lex.Dequeue(), Token.Type.OpMinus); Check(lex.Peek(), Token.Type.Identifier, "foo"); Check(lex.Peek(), Token.Type.Identifier, "foo"); Check(lex.Dequeue(), Token.Type.Identifier, "foo"); Check(lex.Dequeue(), Token.Type.RParen); Check(lex.Dequeue(), Token.Type.Keyword, "end if"); Check(lex.Dequeue(), Token.Type.EOL); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); CheckLineNum(lex.lineNum, 1); lex = new Lexer("\"foo\" \"isn't \"\"real\"\"\" \"now \"\"\"\" double!\""); Check(lex.Dequeue(), Token.Type.String, "foo"); Check(lex.Dequeue(), Token.Type.String, "isn't \"real\""); Check(lex.Dequeue(), Token.Type.String, "now \"\" double!"); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); lex = new Lexer("foo\nbar\rbaz\r\nbamf"); Check(lex.Dequeue(), Token.Type.Identifier, "foo"); CheckLineNum(lex.lineNum, 1); Check(lex.Dequeue(), Token.Type.EOL); Check(lex.Dequeue(), Token.Type.Identifier, "bar"); CheckLineNum(lex.lineNum, 2); Check(lex.Dequeue(), Token.Type.EOL); Check(lex.Dequeue(), Token.Type.Identifier, "baz"); CheckLineNum(lex.lineNum, 3); Check(lex.Dequeue(), Token.Type.EOL); Check(lex.Dequeue(), Token.Type.Identifier, "bamf"); CheckLineNum(lex.lineNum, 4); Check(lex.Dequeue(), Token.Type.EOL); UnitTest.ErrorIf(!lex.AtEnd, "AtEnd not set when it should be"); }
// Find the last token in the given source, ignoring any whitespace // or comment at the end of that line. public static Token LastToken(string source) { // Start by finding the start and logical end of the last line. int startPos = source.LastIndexOf('\n') + 1; int commentStart = CommentStartPos(source, startPos); // Walk back from end of string or start of comment, skipping whitespace. int endPos = (commentStart >= 0 ? commentStart - 1 : source.Length - 1); while (endPos >= 0 && IsWhitespace(source[endPos])) { endPos--; } if (endPos < 0) { return(Token.EOL); } // Find the start of that last token. // There are several cases to consider here. int tokStart = endPos; char c = source[endPos]; if (IsIdentifier(c)) { while (tokStart > startPos && IsIdentifier(source[tokStart - 1])) { tokStart--; } } else if (c == '"') { bool inQuote = true; while (tokStart > startPos) { tokStart--; if (source[tokStart] == '"') { inQuote = !inQuote; if (!inQuote && tokStart > startPos && source[tokStart - 1] != '"') { break; } } } } else if (c == '=' && tokStart > startPos) { char c2 = source[tokStart - 1]; if (c2 == '>' || c2 == '<' || c2 == '=' || c2 == '!') { tokStart--; } } // Now use the standard lexer to grab just that bit. Lexer lex = new Lexer(source); lex.position = tokStart; return(lex.Dequeue()); }
public string Markup(string code, out int outdentThis, out int indentNext) { var sb = new StringBuilder(); var lexer = new Lexer(code); int parenDepth = 0, squareDepth = 0; outdentThis = indentNext = 0; bool statementStart = true; bool ifStatement = false; bool justSawThen = false; while (!lexer.AtEnd) { int start = lexer.position; // grab whitespace (normally skipped by the lexer) if (Lexer.IsWhitespace(code[lexer.position])) { while (!lexer.AtEnd && Lexer.IsWhitespace(code[lexer.position])) { lexer.position++; } sb.Append(code.Substring(start, lexer.position - start)); if (lexer.AtEnd) { break; } start = lexer.position; } // also check for a comment (which the lexer would also skip over) if (lexer.position < code.Length - 2 && code[lexer.position] == '/' && code[lexer.position + 1] == '/') { while (!lexer.AtEnd && code[lexer.position] != '\n') { lexer.position++; } sb.Append(comment.startTags); sb.Append(code.Substring(start, lexer.position - start)); sb.Append(comment.endTags); if (lexer.AtEnd) { break; } start = lexer.position; } // now, grab and process the next token (being sure to catch and handle lexer exceptions) Token tok = null; try { tok = lexer.Dequeue(); } catch (LexerException exc) { tok = new Token(); lexer.position = code.Length; } if (tok.text == "self") { tok.type = Token.Type.Keyword; // (special case) } if (justSawThen && tok.type != Token.Type.Comment && tok.type != Token.Type.EOL) { // If anything (other than a comment) comes after "then", then this // is a single-line if, and should not indent the next line. indentNext = 0; } justSawThen = false; switch (tok.type) { case Token.Type.Keyword: sb.Append(keyword.startTags); sb.Append(tok.text); sb.Append(keyword.endTags); // Styling's done, but also figure out how this keyword changes indentation. if (statementStart) { if (tok.text == "if") { indentNext++; ifStatement = true; } else { ifStatement = false; } if (tok.text == "while" || tok.text == "for" || tok.text == "else" || tok.text == "else if") { indentNext++; } if (tok.text.StartsWith("end ") || tok.text == "else" || tok.text == "else if") { outdentThis++; } } else { if (tok.text == "function") { indentNext++; } if (tok.text == "then") { justSawThen = true; } } break; case Token.Type.Colon: sb.Append(colon.startTags); sb.Append(":"); sb.Append(colon.endTags); statementStart = true; break; case Token.Type.Identifier: sb.Append(identifier.startTags); sb.Append(tok.text); sb.Append(identifier.endTags); break; case Token.Type.String: sb.Append(stringLiteral.startTags); sb.Append("\""); // (note that lexer strips the surrounding quotes) sb.Append(tok.text.Replace("\"", "\"\"")); // and un-doubles internal quotes sb.Append("\""); sb.Append(stringLiteral.endTags); break; case Token.Type.Number: sb.Append(numericLiteral.startTags); sb.Append(tok.text); sb.Append(numericLiteral.endTags); break; case Token.Type.LParen: case Token.Type.RParen: if (tok.type == Token.Type.LParen) { parenDepth++; } if (rotatingParenColors) { float h, s, v; Color.RGBToHSV(baseParenColor, out h, out s, out v); h = Mathf.Repeat(h + 0.22f * (parenDepth - 1), 1); Color color = Color.HSVToRGB(h, s, v); if (parenDepth < 1) { color = Color.red; } sb.Append("<color=#"); sb.Append(ColorUtility.ToHtmlStringRGB(color)); sb.Append(tok.type == Token.Type.LParen ? ">(</color>" : ">)</color>"); } else { sb.Append(tok.type == Token.Type.LParen ? ">(</color>" : ">)</color>"); } if (tok.type == Token.Type.RParen) { parenDepth--; } break; case Token.Type.LSquare: case Token.Type.RSquare: if (tok.type == Token.Type.LSquare) { squareDepth++; } if (rotatingSquareColors) { float h, s, v; Color.RGBToHSV(baseSquareColor, out h, out s, out v); h = Mathf.Repeat(h + 0.22f * (squareDepth - 1), 1); Color color = Color.HSVToRGB(h, s, v); if (squareDepth < 1) { color = Color.red; } sb.Append("<color=#"); sb.Append(ColorUtility.ToHtmlStringRGB(color)); sb.Append(tok.type == Token.Type.LSquare ? ">[</color>" : ">]</color>"); } else { sb.Append(tok.type == Token.Type.LSquare ? ">[</color>" : ">]</color>"); } if (tok.type == Token.Type.RSquare) { squareDepth--; } break; case Token.Type.Unknown: if (code[start] == '"') { sb.Append(openString.startTags); sb.Append(code.Substring(start, lexer.position - start)); sb.Append(openString.endTags); } else { sb.Append(code.Substring(start, lexer.position - start)); } break; default: sb.Append(operators.startTags); sb.Append(code.Substring(start, lexer.position - start)); sb.Append(operators.endTags); break; } statementStart = false; } return(sb.ToString()); }
public void Markup(ref SourceLine dst, SourceLine code) { dst.Reset(); var lexer = new Lexer(code); int parenDepth = 0, squareDepth = 0; #pragma warning disable CS0219 // Variable is assigned but its value is never used bool statementStart = true; bool ifStatement = false; bool justSawThen = false; #pragma warning restore CS0219 // Variable is assigned but its value is never used while (!lexer.AtEnd) { int start = lexer.position; // grab whitespace (normally skipped by the lexer) if (Lexer.IsWhitespace(code[lexer.position])) { while (!lexer.AtEnd && Lexer.IsWhitespace(code[lexer.position])) { lexer.position++; } //_workingStringBuilder.Append(code.Substring(start, lexer.position - start)); dst.Append(ref code, start, lexer.position - start); if (lexer.AtEnd) { break; } start = lexer.position; } // also check for a comment (which the lexer would also skip over) if (lexer.position < code.Length - 2 && code[lexer.position] == '/' && code[lexer.position + 1] == '/') { while (!lexer.AtEnd && code[lexer.position] != '\n') { lexer.position++; } //_workingStringBuilder.Append(comment.startTags); dst.Append(comment.startTags); //_workingStringBuilder.Append(code.Substring(start, lexer.position - start)); dst.Append(ref code, start, lexer.position - start); //_workingStringBuilder.Append(comment.endTags); dst.Append(comment.endTags); if (lexer.AtEnd) { break; } start = lexer.position; } // now, grab and process the next token (being sure to catch and handle lexer exceptions) Token tok = null; try { tok = lexer.Dequeue(); #pragma warning disable CS0168 // Variable is declared but never used } catch (LexerException exc) { #pragma warning restore CS0168 // Variable is declared but never used tok = new Token(); lexer.position = code.Length; } if (tok.text == "self") { tok.type = Token.Type.Keyword; // (special case) } switch (tok.type) { case Token.Type.Keyword: //_workingStringBuilder.Append(keyword.startTags); //_workingStringBuilder.Append(tok.text); //_workingStringBuilder.Append(keyword.endTags); dst.Append(keyword.startTags); dst.Append(tok.text); dst.Append(keyword.endTags); break; case Token.Type.Colon: //_workingStringBuilder.Append(colon.startTags); //_workingStringBuilder.Append(":"); //_workingStringBuilder.Append(colon.endTags); dst.Append(colon.startTags); dst.Append(':'); dst.Append(colon.endTags); statementStart = true; break; case Token.Type.Identifier: //_workingStringBuilder.Append(identifier.startTags); //_workingStringBuilder.Append(tok.text); //_workingStringBuilder.Append(identifier.endTags); dst.Append(identifier.startTags); dst.Append(tok.text); dst.Append(identifier.endTags); break; case Token.Type.String: //_workingStringBuilder.Append(stringLiteral.startTags); //_workingStringBuilder.Append("\""); // (note that lexer strips the surrounding quotes) //_workingStringBuilder.Append(tok.text.Replace("\"", "\"\"")); // and un-doubles internal quotes //_workingStringBuilder.Append("\""); //_workingStringBuilder.Append(stringLiteral.endTags); dst.Append(stringLiteral.startTags); dst.Append('\"'); dst.Append(tok.text.Replace("\"", "\"\"")); dst.Append('\"'); dst.Append(stringLiteral.endTags); break; case Token.Type.Number: //_workingStringBuilder.Append(numericLiteral.startTags); //_workingStringBuilder.Append(tok.text); //_workingStringBuilder.Append(numericLiteral.endTags); dst.Append(numericLiteral.startTags); dst.Append(tok.text); dst.Append(numericLiteral.endTags); break; case Token.Type.LParen: case Token.Type.RParen: if (tok.type == Token.Type.LParen) { parenDepth++; } if (rotatingParenColors) { float h, s, v; Color.RGBToHSV(baseParenColor, out h, out s, out v); h = Mathf.Repeat(h + 0.22f * (parenDepth - 1), 1); Color color = Color.HSVToRGB(h, s, v); if (parenDepth < 1) { color = Color.red; } //_workingStringBuilder.Append("<color=#"); //_workingStringBuilder.Append(ColorUtility.ToHtmlStringRGB(color)); //_workingStringBuilder.Append(tok.type == Token.Type.LParen ? ">(</color>" : ">)</color>"); dst.Append("<color=#"); // TODO cache html string dst.Append(ColorUtility.ToHtmlStringRGB(color)); dst.Append(tok.type == Token.Type.LParen ? ">(</color>" : ">)</color>"); } else { //_workingStringBuilder.Append(tok.type == Token.Type.LParen ? "(</color>" : ")</color>"); dst.Append(tok.type == Token.Type.LParen ? "(</color>" : ")</color>"); } if (tok.type == Token.Type.RParen) { parenDepth--; } break; case Token.Type.LSquare: case Token.Type.RSquare: if (tok.type == Token.Type.LSquare) { squareDepth++; } if (rotatingSquareColors) { float h, s, v; Color.RGBToHSV(baseSquareColor, out h, out s, out v); h = Mathf.Repeat(h + 0.22f * (squareDepth - 1), 1); Color color = Color.HSVToRGB(h, s, v); if (squareDepth < 1) { color = Color.red; } //_workingStringBuilder.Append("<color=#"); //_workingStringBuilder.Append(ColorUtility.ToHtmlStringRGB(color)); //_workingStringBuilder.Append(tok.type == Token.Type.LSquare ? ">[</color>" : ">]</color>"); dst.Append("<color=#"); dst.Append(ColorUtility.ToHtmlStringRGB(color)); dst.Append(tok.type == Token.Type.LSquare ? ">[</color>" : ">]</color>"); } else { //_workingStringBuilder.Append(tok.type == Token.Type.LSquare ? "[</color>" : "]</color>"); dst.Append(tok.type == Token.Type.LSquare ? "[</color>" : "]</color>"); } if (tok.type == Token.Type.RSquare) { squareDepth--; } break; case Token.Type.Unknown: if (code[start] == '"') { //_workingStringBuilder.Append(openString.startTags); //_workingStringBuilder.Append(code.Substring(start, lexer.position - start)); //_workingStringBuilder.Append(openString.endTags); dst.Append(openString.startTags); dst.Append(ref code, start, lexer.position - start); dst.Append(openString.endTags); } else { //_workingStringBuilder.Append(code.Substring(start, lexer.position - start)); dst.Append(ref code, start, lexer.position - start); } break; default: //_workingStringBuilder.Append(operators.startTags); //_workingStringBuilder.Append(code.Substring(start, lexer.position - start)); //_workingStringBuilder.Append(operators.endTags); dst.Append(operators.startTags); dst.Append(ref code, start, lexer.position - start); dst.Append(operators.endTags); break; } statementStart = false; } //return _workingStringBuilder.ToString(); }