Ejemplo n.º 1
0
        public void LexerStateToDFATest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM   = global.DefineToken(RE.Range('0', '9').Many1());
            var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));


            DFAModel dfa = DFAModel.Create(lexicon);

            CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa);

            ScannerInfo si = lexicon.CreateScannerInfo();

            FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable);

            engine.InputString("if");

            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("12345");
            Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("asdf12dd");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("A");
            Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("AAA");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("if ");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            si.LexerStateIndex = keywords.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("else");
            Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            si.LexerStateIndex = xml.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xml");
            Assert.IsFalse(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState));
            ;
        }
Ejemplo n.º 2
0
        public void CompactCharSetTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var lettersCategories = new[] { UnicodeCategory.LetterNumber,
                                            UnicodeCategory.LowercaseLetter,
                                            UnicodeCategory.ModifierLetter,
                                            UnicodeCategory.OtherLetter,
                                            UnicodeCategory.TitlecaseLetter,
                                            UnicodeCategory.UppercaseLetter };

            var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)));


            var ID = global.DefineToken(RE_IDCHAR.Concat(
                                            (RE_IDCHAR | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            var scannerInfo = lexicon.CreateScannerInfo();

            scannerInfo.LexerStateIndex = xml.Index;

            Scanner s = new Scanner(scannerInfo);

            string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if";

            SourceReader sr = new SourceReader(new StringReader(source));

            s.SetSource(sr);
            s.SetTriviaTokens(WHITESPACE.Index);

            var l1 = s.Read();

            Assert.AreEqual(XMLNS.Index, l1.TokenIndex);

            var l2 = s.Read();

            Assert.AreEqual(ID.Index, l2.TokenIndex);

            var l3 = s.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);

            var l4 = s.Read();

            Assert.AreEqual(ID.Index, l4.TokenIndex);

            var l5 = s.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            var l6 = s.Read();

            Assert.AreEqual(IF.Index, l6.TokenIndex);
        }
Ejemplo n.º 3
0
        public void SkipTokenTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index);
            info.LexerStateIndex = xml.Index;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.PrefixTrivia.Count);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(NUM.Index, l2.TokenIndex);
            Assert.AreEqual("1107", l2.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ELSE.Index, l3.TokenIndex);
            Assert.AreEqual("else", l3.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(IF.Index, l4.TokenIndex);
            Assert.AreEqual("if", l4.Value);
            Assert.AreEqual(3, l4.PrefixTrivia.Count);


            int p1 = scanner.Peek();

            Assert.AreEqual(ID.Index, p1);

            int p2   = scanner.Peek2();
            int p3   = scanner.Peek(3);
            int peof = scanner.Peek(4);

            Assert.AreEqual(info.EndOfStreamTokenIndex, peof);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(XMLNS.Index, l7.TokenIndex);

            Lexeme l8 = scanner.Read();

            Assert.AreEqual(NUM.Index, l8.TokenIndex);

            Lexeme leof = scanner.Read();

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);
        }
Ejemplo n.º 4
0
        public void ScannerTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.Span.StartLocation.Column);
            Assert.AreEqual(6, l1.Span.EndLocation.Column);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex);
            Assert.AreEqual(" ", l2.Value);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);
            Assert.AreEqual("1107", l3.Value);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex);

            Lexeme l5 = scanner.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            int p1 = scanner.Peek();

            Assert.AreEqual(WHITESPACE.Index, p1);

            int p2 = scanner.Peek2();

            Assert.AreEqual(ERROR.Index, p2);

            int p3 = scanner.Peek(3);

            Assert.AreEqual(WHITESPACE.Index, p3);

            int p4 = scanner.Peek(4);

            Assert.AreEqual(ID.Index, p4);

            int p5 = scanner.Peek(5);

            Assert.AreEqual(WHITESPACE.Index, p5);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(ERROR.Index, l7.TokenIndex);

            int p3_2 = scanner.Peek();

            Assert.AreEqual(p3, p3_2);

            Lexeme l8   = scanner.Read(); // whitespace
            Lexeme l9   = scanner.Read(); // ID:if
            Lexeme l10  = scanner.Read(); // whitespace
            Lexeme l11  = scanner.Read(); // ID:vvv
            Lexeme l12  = scanner.Read(); // whitespace
            Lexeme l13  = scanner.Read(); // ID:xmlns
            Lexeme l14  = scanner.Read(); // whitespace
            Lexeme l15  = scanner.Read(); // NUM:772737
            Lexeme leof = scanner.Read(); // eof

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);

            Lexeme leof2 = scanner.Read(); //after eof, should return eof again

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof2.Span.StartLocation.CharIndex);
        }
Ejemplo n.º 5
0
        protected override void OnDefineLexer(Compilers.Scanners.Lexicon lexicon, ICollection <Token> skippedTokens)
        {
            var lettersCategories = new HashSet <UnicodeCategory>()
            {
                UnicodeCategory.LetterNumber,
                UnicodeCategory.LowercaseLetter,
                UnicodeCategory.ModifierLetter,
                UnicodeCategory.OtherLetter,
                UnicodeCategory.TitlecaseLetter,
                UnicodeCategory.UppercaseLetter
            };

            RE RE_IdChar             = null;
            RE RE_SpaceChar          = null;
            RE RE_InputChar          = null;
            RE RE_NotSlashOrAsterisk = null;

            CharSetExpressionBuilder charSetBuilder = new CharSetExpressionBuilder();

            charSetBuilder.DefineCharSet(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)), re => RE_IdChar          = re | RE.Symbol('_'));
            charSetBuilder.DefineCharSet(c => Char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator, re => RE_SpaceChar = re);
            charSetBuilder.DefineCharSet(c => "\u000D\u000A\u0085\u2028\u2029".IndexOf(c) < 0, re => RE_InputChar = re);
            charSetBuilder.DefineCharSet(c => "/*".IndexOf(c) < 0, re => RE_NotSlashOrAsterisk = re);

            charSetBuilder.Build();

            var lex = lexicon.Lexer;

            //keywords
            K_CLASS     = lex.DefineToken(RE.Literal("class"));
            K_PUBLIC    = lex.DefineToken(RE.Literal("public"));
            K_STATIC    = lex.DefineToken(RE.Literal("static"));
            K_VOID      = lex.DefineToken(RE.Literal("void"));
            K_MAIN      = lex.DefineToken(RE.Literal("Main"));
            K_STRING    = lex.DefineToken(RE.Literal("string"));
            K_RETURN    = lex.DefineToken(RE.Literal("return"));
            K_INT       = lex.DefineToken(RE.Literal("int"));
            K_BOOL      = lex.DefineToken(RE.Literal("bool"));
            K_IF        = lex.DefineToken(RE.Literal("if"));
            K_ELSE      = lex.DefineToken(RE.Literal("else"));
            K_WHILE     = lex.DefineToken(RE.Literal("while"));
            K_SYSTEM    = lex.DefineToken(RE.Literal("System"));
            K_CONSOLE   = lex.DefineToken(RE.Literal("Console"));
            K_WRITELINE = lex.DefineToken(RE.Literal("WriteLine"));
            K_LENGTH    = lex.DefineToken(RE.Literal("Length"));
            K_TRUE      = lex.DefineToken(RE.Literal("true"));
            K_FALSE     = lex.DefineToken(RE.Literal("false"));
            K_THIS      = lex.DefineToken(RE.Literal("this"));
            K_NEW       = lex.DefineToken(RE.Literal("new"));

            //id & literals

            ID = lex.DefineToken(RE_IdChar >>
                                 (RE_IdChar | RE.Range('0', '9')).Many(), "identifier");
            INTEGER_LITERAL = lex.DefineToken(RE.Range('0', '9').Many1(), "integer literal");

            //symbols

            LOGICAL_AND = lex.DefineToken(RE.Literal("&&"));
            LOGICAL_OR  = lex.DefineToken(RE.Literal("||"));
            LOGICAL_NOT = lex.DefineToken(RE.Symbol('!'));
            LESS        = lex.DefineToken(RE.Symbol('<'));
            GREATER     = lex.DefineToken(RE.Symbol('>'));
            EQUAL       = lex.DefineToken(RE.Literal("=="));
            ASSIGN      = lex.DefineToken(RE.Symbol('='));
            PLUS        = lex.DefineToken(RE.Symbol('+'));
            MINUS       = lex.DefineToken(RE.Symbol('-'));
            ASTERISK    = lex.DefineToken(RE.Symbol('*'));
            SLASH       = lex.DefineToken(RE.Symbol('/'));
            LEFT_PH     = lex.DefineToken(RE.Symbol('('));
            RIGHT_PH    = lex.DefineToken(RE.Symbol(')'));
            LEFT_BK     = lex.DefineToken(RE.Symbol('['));
            RIGHT_BK    = lex.DefineToken(RE.Symbol(']'));
            LEFT_BR     = lex.DefineToken(RE.Symbol('{'));
            RIGHT_BR    = lex.DefineToken(RE.Symbol('}'));
            COMMA       = lex.DefineToken(RE.Symbol(','));
            COLON       = lex.DefineToken(RE.Symbol(':'));
            SEMICOLON   = lex.DefineToken(RE.Symbol(';'));
            DOT         = lex.DefineToken(RE.Symbol('.'));

            //skips

            WHITESPACE = lex.DefineToken(RE_SpaceChar | RE.CharSet("\u0009\u000B\u000C"));

            LINE_BREAKER = lex.DefineToken(
                RE.CharSet("\u000D\u000A\u0085\u2028\u2029") |
                RE.Literal("\r\n")
                );


            var RE_DelimitedCommentSection = RE.Symbol('/') | (RE.Symbol('*').Many() >> RE_NotSlashOrAsterisk);

            COMMENT = lex.DefineToken(
                (RE.Literal("//") >> RE_InputChar.Many()) |
                (RE.Literal("/*") >> RE_DelimitedCommentSection.Many() >> RE.Symbol('*').Many1() >> RE.Symbol('/')),
                "comment");

            skippedTokens.Add(WHITESPACE);
            skippedTokens.Add(LINE_BREAKER);
            skippedTokens.Add(COMMENT);
        }
Ejemplo n.º 6
0
        public void MultipleLexerParsingTest()
        {
            Lexicon lexicon  = new Lexicon();
            Lexer   global   = lexicon.Lexer;
            Lexer   keywords = global.CreateSubLexer();

            var PROPERTY = global.DefineToken(RE.Literal("property"));
            var ID       = global.DefineToken(RE.Range('a', 'z').Concat(
                                                  (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID");
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1(), "NUM");
            var EQ         = global.DefineToken(RE.Symbol('='));
            var SEMICOLON  = global.DefineToken(RE.Symbol(';'));
            var LB         = global.DefineToken(RE.Symbol('{'));
            var RB         = global.DefineToken(RE.Symbol('}'));
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "[ ]");

            var GET = keywords.DefineToken(RE.Literal("get"));

            var assignStatement =
                from id in ID
                from eq in EQ
                from value in NUM
                from st in SEMICOLON
                select id.Value + "=" + value.Value;

            var getDef =
                from _get in GET
                from lb in LB
                from statements in assignStatement.Many()
                from rb in RB
                select new GetDef {
                Statements = statements
            };

            var propDef =
                from _prop in PROPERTY
                from id in ID
                from lb in LB
                from getdef in getDef
                from rb in RB
                select new PropDef {
                PropName = id.Value.Content, GetDef = getdef
            };

            string       source = "property get { get { get = 1; } }";
            SourceReader sr     = new SourceReader(
                new StringReader(source));

            var     info    = lexicon.CreateScannerInfo();
            Scanner scanner = new Scanner(info);

            scanner.SetTriviaTokens(WHITESPACE.Index);
            scanner.SetSource(sr);

            CompilationErrorManager errorManager = new CompilationErrorManager();

            errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'");
            errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'");
            errorManager.DefineError(3, 0, CompilationStage.Parsing, "Syntax error");

            ProductionInfoManager pim = new ProductionInfoManager(propDef.SuffixedBy(Grammar.Eos()));

            LR0Model lr0 = new LR0Model(pim);

            lr0.BuildModel();

            string dot = lr0.ToString();

            TransitionTable tt = TransitionTable.Create(lr0, info);

            SyntaxErrors errDef = new SyntaxErrors()
            {
                TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 3
            };

            ParserEngine driver = new ParserEngine(tt, errDef);

            Lexeme r;

            do
            {
                r = scanner.Read();

                driver.Input(r);
            } while (!r.IsEndOfStream);

            var el = errorManager.CreateErrorList();

            var result = (PropDef)driver.GetResult(0, el);

            Assert.AreEqual(0, el.Count);
            Assert.AreEqual("get", result.PropName);
            Assert.AreEqual("get=1", result.GetDef.Statements.First());
        }