예제 #1
0
        protected override void OnDefineLexer(Compilers.Scanners.Lexicon lexicon, ICollection <Token> skippedTokens)
        {
            var lettersCategories = new HashSet <UnicodeCategory>()
            {
                UnicodeCategory.LetterNumber,
                UnicodeCategory.LowercaseLetter,
                UnicodeCategory.ModifierLetter,
                UnicodeCategory.OtherLetter,
                UnicodeCategory.TitlecaseLetter,
                UnicodeCategory.UppercaseLetter
            };

            RE RE_IdChar             = null;
            RE RE_SpaceChar          = null;
            RE RE_InputChar          = null;
            RE RE_NotSlashOrAsterisk = null;

            CharSetExpressionBuilder charSetBuilder = new CharSetExpressionBuilder();

            charSetBuilder.DefineCharSet(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)), re => RE_IdChar          = re | RE.Symbol('_'));
            charSetBuilder.DefineCharSet(c => Char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator, re => RE_SpaceChar = re);
            charSetBuilder.DefineCharSet(c => "\u000D\u000A\u0085\u2028\u2029".IndexOf(c) < 0, re => RE_InputChar = re);
            charSetBuilder.DefineCharSet(c => "/*".IndexOf(c) < 0, re => RE_NotSlashOrAsterisk = re);

            charSetBuilder.Build();

            var lex = lexicon.Lexer;

            //keywords
            K_CLASS     = lex.DefineToken(RE.Literal("class"));
            K_PUBLIC    = lex.DefineToken(RE.Literal("public"));
            K_STATIC    = lex.DefineToken(RE.Literal("static"));
            K_VOID      = lex.DefineToken(RE.Literal("void"));
            K_MAIN      = lex.DefineToken(RE.Literal("Main"));
            K_STRING    = lex.DefineToken(RE.Literal("string"));
            K_RETURN    = lex.DefineToken(RE.Literal("return"));
            K_INT       = lex.DefineToken(RE.Literal("int"));
            K_BOOL      = lex.DefineToken(RE.Literal("bool"));
            K_IF        = lex.DefineToken(RE.Literal("if"));
            K_ELSE      = lex.DefineToken(RE.Literal("else"));
            K_WHILE     = lex.DefineToken(RE.Literal("while"));
            K_SYSTEM    = lex.DefineToken(RE.Literal("System"));
            K_CONSOLE   = lex.DefineToken(RE.Literal("Console"));
            K_WRITELINE = lex.DefineToken(RE.Literal("WriteLine"));
            K_LENGTH    = lex.DefineToken(RE.Literal("Length"));
            K_TRUE      = lex.DefineToken(RE.Literal("true"));
            K_FALSE     = lex.DefineToken(RE.Literal("false"));
            K_THIS      = lex.DefineToken(RE.Literal("this"));
            K_NEW       = lex.DefineToken(RE.Literal("new"));

            //id & literals

            ID = lex.DefineToken(RE_IdChar >>
                                 (RE_IdChar | RE.Range('0', '9')).Many(), "identifier");
            INTEGER_LITERAL = lex.DefineToken(RE.Range('0', '9').Many1(), "integer literal");

            //symbols

            LOGICAL_AND = lex.DefineToken(RE.Literal("&&"));
            LOGICAL_OR  = lex.DefineToken(RE.Literal("||"));
            LOGICAL_NOT = lex.DefineToken(RE.Symbol('!'));
            LESS        = lex.DefineToken(RE.Symbol('<'));
            GREATER     = lex.DefineToken(RE.Symbol('>'));
            EQUAL       = lex.DefineToken(RE.Literal("=="));
            ASSIGN      = lex.DefineToken(RE.Symbol('='));
            PLUS        = lex.DefineToken(RE.Symbol('+'));
            MINUS       = lex.DefineToken(RE.Symbol('-'));
            ASTERISK    = lex.DefineToken(RE.Symbol('*'));
            SLASH       = lex.DefineToken(RE.Symbol('/'));
            LEFT_PH     = lex.DefineToken(RE.Symbol('('));
            RIGHT_PH    = lex.DefineToken(RE.Symbol(')'));
            LEFT_BK     = lex.DefineToken(RE.Symbol('['));
            RIGHT_BK    = lex.DefineToken(RE.Symbol(']'));
            LEFT_BR     = lex.DefineToken(RE.Symbol('{'));
            RIGHT_BR    = lex.DefineToken(RE.Symbol('}'));
            COMMA       = lex.DefineToken(RE.Symbol(','));
            COLON       = lex.DefineToken(RE.Symbol(':'));
            SEMICOLON   = lex.DefineToken(RE.Symbol(';'));
            DOT         = lex.DefineToken(RE.Symbol('.'));

            //skips

            WHITESPACE = lex.DefineToken(RE_SpaceChar | RE.CharSet("\u0009\u000B\u000C"));

            LINE_BREAKER = lex.DefineToken(
                RE.CharSet("\u000D\u000A\u0085\u2028\u2029") |
                RE.Literal("\r\n")
                );


            var RE_DelimitedCommentSection = RE.Symbol('/') | (RE.Symbol('*').Many() >> RE_NotSlashOrAsterisk);

            COMMENT = lex.DefineToken(
                (RE.Literal("//") >> RE_InputChar.Many()) |
                (RE.Literal("/*") >> RE_DelimitedCommentSection.Many() >> RE.Symbol('*').Many1() >> RE.Symbol('/')),
                "comment");

            skippedTokens.Add(WHITESPACE);
            skippedTokens.Add(LINE_BREAKER);
            skippedTokens.Add(COMMENT);
        }