protected override void OnDefineLexer(Compilers.Scanners.Lexicon lexicon, ICollection <Token> skippedTokens) { var lettersCategories = new HashSet <UnicodeCategory>() { UnicodeCategory.LetterNumber, UnicodeCategory.LowercaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.UppercaseLetter }; RE RE_IdChar = null; RE RE_SpaceChar = null; RE RE_InputChar = null; RE RE_NotSlashOrAsterisk = null; CharSetExpressionBuilder charSetBuilder = new CharSetExpressionBuilder(); charSetBuilder.DefineCharSet(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)), re => RE_IdChar = re | RE.Symbol('_')); charSetBuilder.DefineCharSet(c => Char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator, re => RE_SpaceChar = re); charSetBuilder.DefineCharSet(c => "\u000D\u000A\u0085\u2028\u2029".IndexOf(c) < 0, re => RE_InputChar = re); charSetBuilder.DefineCharSet(c => "/*".IndexOf(c) < 0, re => RE_NotSlashOrAsterisk = re); charSetBuilder.Build(); var lex = lexicon.Lexer; //keywords K_CLASS = lex.DefineToken(RE.Literal("class")); K_PUBLIC = lex.DefineToken(RE.Literal("public")); K_STATIC = lex.DefineToken(RE.Literal("static")); K_VOID = lex.DefineToken(RE.Literal("void")); K_MAIN = lex.DefineToken(RE.Literal("Main")); K_STRING = lex.DefineToken(RE.Literal("string")); K_RETURN = lex.DefineToken(RE.Literal("return")); K_INT = lex.DefineToken(RE.Literal("int")); K_BOOL = lex.DefineToken(RE.Literal("bool")); K_IF = lex.DefineToken(RE.Literal("if")); K_ELSE = lex.DefineToken(RE.Literal("else")); K_WHILE = lex.DefineToken(RE.Literal("while")); K_SYSTEM = lex.DefineToken(RE.Literal("System")); K_CONSOLE = lex.DefineToken(RE.Literal("Console")); K_WRITELINE = lex.DefineToken(RE.Literal("WriteLine")); K_LENGTH = lex.DefineToken(RE.Literal("Length")); K_TRUE = lex.DefineToken(RE.Literal("true")); K_FALSE = lex.DefineToken(RE.Literal("false")); K_THIS = lex.DefineToken(RE.Literal("this")); K_NEW = lex.DefineToken(RE.Literal("new")); //id & literals ID = lex.DefineToken(RE_IdChar >> (RE_IdChar | RE.Range('0', '9')).Many(), "identifier"); INTEGER_LITERAL = lex.DefineToken(RE.Range('0', '9').Many1(), "integer literal"); //symbols LOGICAL_AND = lex.DefineToken(RE.Literal("&&")); LOGICAL_OR = lex.DefineToken(RE.Literal("||")); LOGICAL_NOT = lex.DefineToken(RE.Symbol('!')); LESS = lex.DefineToken(RE.Symbol('<')); GREATER = lex.DefineToken(RE.Symbol('>')); EQUAL = lex.DefineToken(RE.Literal("==")); ASSIGN = lex.DefineToken(RE.Symbol('=')); PLUS = lex.DefineToken(RE.Symbol('+')); MINUS = lex.DefineToken(RE.Symbol('-')); ASTERISK = lex.DefineToken(RE.Symbol('*')); SLASH = lex.DefineToken(RE.Symbol('/')); LEFT_PH = lex.DefineToken(RE.Symbol('(')); RIGHT_PH = lex.DefineToken(RE.Symbol(')')); LEFT_BK = lex.DefineToken(RE.Symbol('[')); RIGHT_BK = lex.DefineToken(RE.Symbol(']')); LEFT_BR = lex.DefineToken(RE.Symbol('{')); RIGHT_BR = lex.DefineToken(RE.Symbol('}')); COMMA = lex.DefineToken(RE.Symbol(',')); COLON = lex.DefineToken(RE.Symbol(':')); SEMICOLON = lex.DefineToken(RE.Symbol(';')); DOT = lex.DefineToken(RE.Symbol('.')); //skips WHITESPACE = lex.DefineToken(RE_SpaceChar | RE.CharSet("\u0009\u000B\u000C")); LINE_BREAKER = lex.DefineToken( RE.CharSet("\u000D\u000A\u0085\u2028\u2029") | RE.Literal("\r\n") ); var RE_DelimitedCommentSection = RE.Symbol('/') | (RE.Symbol('*').Many() >> RE_NotSlashOrAsterisk); COMMENT = lex.DefineToken( (RE.Literal("//") >> RE_InputChar.Many()) | (RE.Literal("/*") >> RE_DelimitedCommentSection.Many() >> RE.Symbol('*').Many1() >> RE.Symbol('/')), "comment"); skippedTokens.Add(WHITESPACE); skippedTokens.Add(LINE_BREAKER); skippedTokens.Add(COMMENT); }