Beispiel #1
0
        public void ParserFuncTest()
        {
            Lexicon test = new Lexicon();

            var ID = test.DefaultLexer.DefineToken(RE.Range('a', 'z').Concat(
                                                       (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM     = test.DefaultLexer.DefineToken(RE.Range('0', '9').Many1());
            var GREATER = test.DefaultLexer.DefineToken(RE.Symbol('>'));

            var WHITESPACE = test.DefaultLexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')));

            var p1 = from i in ID
                     from g in GREATER
                     from g2 in GREATER.AsParser(l => l.PrefixTrivia.Count == 0)
                     from n in NUM
                     select "A";

            var p2 = from i in ID
                     from g in GREATER
                     from g2 in GREATER
                     from n in NUM
                     select "B";

            var parser1 = p1 | p2;


            var info = test.CreateScannerInfo();
            ForkableScannerBuilder builder = new ForkableScannerBuilder(info);

            builder.SetTriviaTokens(WHITESPACE.Index);

            var errorManager = new CompilationErrorManager();
            var context      = new ParserContext(errorManager, 1, 2);

            context.DefineDefaultCompilationErrorInfo(0);

            ParserRunner <string> runner = new ParserRunner <string>(parser1, context);

            string source1 = "abc >> 123";
            var    sr1     = new SourceReader(new StringReader(source1));

            ForkableScanner scanner1 = builder.Create(sr1);

            var result1 = runner.Run(scanner1);

            Assert.AreEqual("A", result1);
            Assert.AreEqual(0, errorManager.Errors.Count);

            string source2 = "abc > > 123";
            var    sr2     = new SourceReader(new StringReader(source2));

            ForkableScanner scanner2 = builder.Create(sr2);

            var result2 = runner.Run(scanner2);

            Assert.AreEqual("B", result2);
            Assert.AreEqual(0, errorManager.Errors.Count);
        }
Beispiel #2
0
        public void ThrowExceptionAtEosTest()
        {
            Lexicon lexicon = new Lexicon();
            Lexer   global  = lexicon.Lexer;

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));


            ScannerInfo info    = lexicon.CreateScannerInfo();
            Scanner     scanner = new Scanner(info);

            string       source = "aaa bbb ccc";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index);
            scanner.ThrowAtReadingAfterEndOfStream = true;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("aaa", l1.ToContentString());
            Assert.AreEqual(0, l1.PrefixTrivia.Count);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(ID.Index, l2.TokenIndex);
            Assert.AreEqual("bbb", l2.ToContentString());
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ID.Index, l3.TokenIndex);
            Assert.AreEqual("ccc", l3.ToContentString());
            Assert.AreEqual(1, l3.PrefixTrivia.Count);

            Lexeme leof = scanner.Read();

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);

            try
            {
                Lexeme leof2 = scanner.Read();
                Assert.Fail("The Read above should throw an exception");
            }
            catch (ScannerException)
            {
                Assert.True(true);
            }
        }
Beispiel #3
0
            protected override void OnDefineLexer(Lexicon lexicon, ICollection <Token> triviaTokens)
            {
                var lex = lexicon.Lexer;

                //lex
                LEFTPH  = lex.DefineToken(RE.Symbol('('));
                RIGHTPH = lex.DefineToken(RE.Symbol(')'));
                COMMA   = lex.DefineToken(RE.Symbol(','));
                LETTER  = lex.DefineToken(RE.Range('a', 'z') | RE.Range('A', 'Z'), "ID");
            }
Beispiel #4
0
        public void ForkableScannerTest()
        {
            Lexicon lexicon = new Lexicon();
            var     A       = lexicon.DefaultLexer.DefineToken(RE.Range('a', 'z'));

            ScannerInfo si     = lexicon.CreateScannerInfo();
            string      source = "abcdefghijklmnopqrstuvwxyz";

            ForkableScannerBuilder fsBuilder = new ForkableScannerBuilder(si);
            ForkableScanner        fscanner  = fsBuilder.Create(new SourceReader(new StringReader(source)));

            var l1 = fscanner.Read();

            Assert.AreEqual("a", l1.Value);
            var l2 = fscanner.Read();

            Assert.AreEqual("b", l2.Value);

            //fork
            ForkableScanner fscanner2 = fscanner.Fork();

            for (int i = 2; i <= 4; i++)
            {
                var l = fscanner.Read();
                Assert.AreEqual(source[i].ToString(), l.Value);
            }

            for (int i = 2; i <= 5; i++)
            {
                var l = fscanner2.Read();
                Assert.AreEqual(source[i].ToString(), l.Value);
            }

            ForkableScanner fscanner3 = fscanner.Fork();

            var l5a = fscanner.Read();
            var l5b = fscanner3.Read();

            Assert.AreEqual(source[5].ToString(), l5a.Value);
            Assert.AreEqual(source[5].ToString(), l5b.Value);

            var l6b = fscanner2.Read();
            var l6a = fscanner3.Read();

            Assert.AreEqual(source[6].ToString(), l6a.Value);
            Assert.AreEqual(source[6].ToString(), l6b.Value);

            var l7a = fscanner2.Read();

            for (int i = 7; i < 9; i++)
            {
                var l = fscanner3.Read();
                Assert.AreEqual(source[i].ToString(), l.Value);
            }
        }
Beispiel #5
0
            protected override void OnDefineLexer(Lexicon lexicon, ICollection <Token> triviaTokens)
            {
                var lexer = lexicon.Lexer;

                PLUS              = lexer.DefineToken(RE.Symbol('+'));
                ASTERISK          = lexer.DefineToken(RE.Symbol('*'));
                LEFT_PARENTHESIS  = lexer.DefineToken(RE.Symbol('('));
                RIGHT_PARENTHESIS = lexer.DefineToken(RE.Symbol(')'));
                NUMBER            = lexer.DefineToken(RE.Range('0', '9').Many1(), "number");
                SPACE             = lexer.DefineToken(RE.Symbol(' ').Many1());

                triviaTokens.Add(SPACE);
            }
Beispiel #6
0
            protected override void OnDefineLexer(Lexicon lexicon, ICollection <Token> triviaTokens)
            {
                ID = lexicon.Lexer.DefineToken(RE.Range('a', 'z').Concat(
                                                   (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID");
                NUM       = lexicon.Lexer.DefineToken(RE.Range('0', '9').Many1(), "NUM");
                GREATER   = lexicon.Lexer.DefineToken(RE.Symbol('>'));
                LESS      = lexicon.Lexer.DefineToken(RE.Symbol('<'));
                SEMICOLON = lexicon.Lexer.DefineToken(RE.Symbol(';'));

                var WHITESPACE = lexicon.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "white space");

                triviaTokens.Add(WHITESPACE);
            }
Beispiel #7
0
        public void ErrorRecoveryTest()
        {
            Lexicon lexicon = new Lexicon();
            Lexer   global  = lexicon.Lexer;


            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());

            ScannerInfo info    = lexicon.CreateScannerInfo();
            Scanner     scanner = new Scanner(info);

            string       source = "asdf04a 1107 !@#$!@ Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index);
            scanner.RecoverErrors = true;

            CompilationErrorManager em = new CompilationErrorManager();

            em.DefineError(101, 0, CompilationStage.Scanning, "Invalid token: {0}");

            var el = em.CreateErrorList();

            scanner.ErrorList      = el;
            scanner.LexicalErrorId = 101;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(NUM.Index, l2.TokenIndex);

            Assert.AreEqual(0, el.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ID.Index, l3.TokenIndex);

            Assert.IsTrue(el.Count > 0);
            Assert.AreEqual(101, el[0].Info.Id);
        }
Beispiel #8
0
        public void ParserCastTest()
        {
            Lexicon test = new Lexicon();

            var ID = test.Lexer.DefineToken(RE.Range('a', 'z').Concat(
                                                (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM     = test.Lexer.DefineToken(RE.Range('0', '9').Many1());
            var GREATER = test.Lexer.DefineToken(RE.Symbol('>'));

            var WHITESPACE = test.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')));

            var p1 = from i in ID
                     from g in GREATER
                     from g2 in GREATER
                     from n in NUM
                     select "hello";

            var parser1 = p1.TryCast <object>();

            var info = test.CreateScannerInfo();
            ForkableScannerBuilder builder = new ForkableScannerBuilder(info);

            builder.SetTriviaTokens(WHITESPACE.Index);

            var errorManager = new CompilationErrorManager();
            var context      = new ParserContext(errorManager, 1, 2);

            context.DefineDefaultCompilationErrorInfo(0);

            var el = errorManager.CreateErrorList();

            context.ErrorList = el;

            ParserRunner <object> runner = new ParserRunner <object>(parser1, context);

            string source1 = "abc >> 123";
            var    sr1     = new SourceReader(new StringReader(source1));

            ForkableScanner scanner1 = builder.Create(sr1);

            var result1 = runner.Run(scanner1);

            Assert.AreEqual("hello", result1);
            Assert.AreEqual(0, el.Count);
        }
Beispiel #9
0
        public void RegExToDFATest()
        {
            //var RE_IF = RE.Literal("if");
            //var RE_ELSE = RE.Literal("else");
            var RE_ID = RE.Range('a', 'z').Concat(
                (RE.Range('a', 'z') | RE.Range('0', '9')).Many());
            //var RE_NUM = RE.Range('0', '9').Many1();
            //var RE_ERROR = RE.Range(Char.MinValue, (char)255);
            Lexicon lexicon = new Lexicon();
            var     ID      = lexicon.DefaultLexer.DefineToken(RE_ID);

            NFAConverter nfaConverter = new NFAConverter(lexicon.CreateCompactCharSetManager());

            DFAModel D_ID = DFAModel.Create(lexicon);

            //verify state 0
            var state0 = D_ID.States[0];

            Assert.AreEqual(3, state0.OutEdges.Count);
            foreach (var edge in state0.OutEdges)
            {
                Assert.AreEqual(0, edge.TargetState.Index);
            }

            //verify initialization state
            var state1 = D_ID.States[1];

            foreach (var edge in state1.OutEdges)
            {
                if (edge.Symbol == 1) //a..z
                {
                    Assert.IsTrue(edge.TargetState.Index > 0);
                }
                else
                {
                    Assert.AreEqual(0, edge.TargetState.Index);
                }
            }
        }
Beispiel #10
0
        public void ParserDriverConflictTest()
        {
            Lexicon test = new Lexicon();

            var X        = test.Lexer.DefineToken(RE.Symbol('x'));
            var PLUS     = test.Lexer.DefineToken(RE.Symbol('+'));
            var ASTERISK = test.Lexer.DefineToken(RE.Symbol('*'));

            var scannerinfo = test.CreateScannerInfo();

            Production <object> E = new Production <object>(), T = new Production <object>();

            E.Rule =
                (from e1 in E
                 from plus in PLUS
                 from e2 in E
                 select(object)(((int)e1) + ((int)e2))) |
                (from e1 in E
                 from mul in ASTERISK
                 from e2 in E
                 select(object)(((int)e1) * ((int)e2))) | T;

            T.Rule =
                from x in X
                select(object) 2;

            ProductionInfoManager pim = new ProductionInfoManager(E.SuffixedBy(Grammar.Eos()));

            LR0Model lr0 = new LR0Model(pim);

            lr0.BuildModel();

            string dot = lr0.ToString();

            TransitionTable tt = TransitionTable.Create(lr0, scannerinfo);

            ParserEngine driver = new ParserEngine(tt, new SyntaxErrors());

            ForkableScannerBuilder builder = new ForkableScannerBuilder(scannerinfo);

            var em = new CompilationErrorManager();
            var el = em.CreateErrorList();

            builder.ErrorList = el;
            var scanner = builder.Create(new SourceReader(new StringReader("x+x*x")));

            var z1 = scanner.Read();

            driver.Input(z1);

            var z2 = scanner.Read();

            driver.Input(z2);

            var z3 = scanner.Read();

            driver.Input(z3);

            var z4 = scanner.Read();

            driver.Input(z4);

            var z5 = scanner.Read();

            driver.Input(z5);

            var z6 = scanner.Read();

            driver.Input(z6);

            Assert.AreEqual(0, driver.CurrentStackCount);
            Assert.AreEqual(2, driver.AcceptedCount);

            var results = new[] { (int)driver.GetResult(0, null), (int)driver.GetResult(1, null) };

            Assert.IsTrue(results.Contains(8));
            Assert.IsTrue(results.Contains(6));
        }
Beispiel #11
0
        protected override void OnDefineLexer(Compilers.Scanners.Lexicon lexicon, ICollection <Token> skippedTokens)
        {
            var lettersCategories = new HashSet <UnicodeCategory>()
            {
                UnicodeCategory.LetterNumber,
                UnicodeCategory.LowercaseLetter,
                UnicodeCategory.ModifierLetter,
                UnicodeCategory.OtherLetter,
                UnicodeCategory.TitlecaseLetter,
                UnicodeCategory.UppercaseLetter
            };

            RE RE_IdChar             = null;
            RE RE_SpaceChar          = null;
            RE RE_InputChar          = null;
            RE RE_NotSlashOrAsterisk = null;

            CharSetExpressionBuilder charSetBuilder = new CharSetExpressionBuilder();

            charSetBuilder.DefineCharSet(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)), re => RE_IdChar          = re | RE.Symbol('_'));
            charSetBuilder.DefineCharSet(c => Char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator, re => RE_SpaceChar = re);
            charSetBuilder.DefineCharSet(c => "\u000D\u000A\u0085\u2028\u2029".IndexOf(c) < 0, re => RE_InputChar = re);
            charSetBuilder.DefineCharSet(c => "/*".IndexOf(c) < 0, re => RE_NotSlashOrAsterisk = re);

            charSetBuilder.Build();

            var lex = lexicon.Lexer;

            //keywords
            K_CLASS     = lex.DefineToken(RE.Literal("class"));
            K_PUBLIC    = lex.DefineToken(RE.Literal("public"));
            K_STATIC    = lex.DefineToken(RE.Literal("static"));
            K_VOID      = lex.DefineToken(RE.Literal("void"));
            K_MAIN      = lex.DefineToken(RE.Literal("Main"));
            K_STRING    = lex.DefineToken(RE.Literal("string"));
            K_RETURN    = lex.DefineToken(RE.Literal("return"));
            K_INT       = lex.DefineToken(RE.Literal("int"));
            K_BOOL      = lex.DefineToken(RE.Literal("bool"));
            K_IF        = lex.DefineToken(RE.Literal("if"));
            K_ELSE      = lex.DefineToken(RE.Literal("else"));
            K_WHILE     = lex.DefineToken(RE.Literal("while"));
            K_SYSTEM    = lex.DefineToken(RE.Literal("System"));
            K_CONSOLE   = lex.DefineToken(RE.Literal("Console"));
            K_WRITELINE = lex.DefineToken(RE.Literal("WriteLine"));
            K_LENGTH    = lex.DefineToken(RE.Literal("Length"));
            K_TRUE      = lex.DefineToken(RE.Literal("true"));
            K_FALSE     = lex.DefineToken(RE.Literal("false"));
            K_THIS      = lex.DefineToken(RE.Literal("this"));
            K_NEW       = lex.DefineToken(RE.Literal("new"));

            //id & literals

            ID = lex.DefineToken(RE_IdChar >>
                                 (RE_IdChar | RE.Range('0', '9')).Many(), "identifier");
            INTEGER_LITERAL = lex.DefineToken(RE.Range('0', '9').Many1(), "integer literal");

            //symbols

            LOGICAL_AND = lex.DefineToken(RE.Literal("&&"));
            LOGICAL_OR  = lex.DefineToken(RE.Literal("||"));
            LOGICAL_NOT = lex.DefineToken(RE.Symbol('!'));
            LESS        = lex.DefineToken(RE.Symbol('<'));
            GREATER     = lex.DefineToken(RE.Symbol('>'));
            EQUAL       = lex.DefineToken(RE.Literal("=="));
            ASSIGN      = lex.DefineToken(RE.Symbol('='));
            PLUS        = lex.DefineToken(RE.Symbol('+'));
            MINUS       = lex.DefineToken(RE.Symbol('-'));
            ASTERISK    = lex.DefineToken(RE.Symbol('*'));
            SLASH       = lex.DefineToken(RE.Symbol('/'));
            LEFT_PH     = lex.DefineToken(RE.Symbol('('));
            RIGHT_PH    = lex.DefineToken(RE.Symbol(')'));
            LEFT_BK     = lex.DefineToken(RE.Symbol('['));
            RIGHT_BK    = lex.DefineToken(RE.Symbol(']'));
            LEFT_BR     = lex.DefineToken(RE.Symbol('{'));
            RIGHT_BR    = lex.DefineToken(RE.Symbol('}'));
            COMMA       = lex.DefineToken(RE.Symbol(','));
            COLON       = lex.DefineToken(RE.Symbol(':'));
            SEMICOLON   = lex.DefineToken(RE.Symbol(';'));
            DOT         = lex.DefineToken(RE.Symbol('.'));

            //skips

            WHITESPACE = lex.DefineToken(RE_SpaceChar | RE.CharSet("\u0009\u000B\u000C"));

            LINE_BREAKER = lex.DefineToken(
                RE.CharSet("\u000D\u000A\u0085\u2028\u2029") |
                RE.Literal("\r\n")
                );


            var RE_DelimitedCommentSection = RE.Symbol('/') | (RE.Symbol('*').Many() >> RE_NotSlashOrAsterisk);

            COMMENT = lex.DefineToken(
                (RE.Literal("//") >> RE_InputChar.Many()) |
                (RE.Literal("/*") >> RE_DelimitedCommentSection.Many() >> RE.Symbol('*').Many1() >> RE.Symbol('/')),
                "comment");

            skippedTokens.Add(WHITESPACE);
            skippedTokens.Add(LINE_BREAKER);
            skippedTokens.Add(COMMENT);
        }
Beispiel #12
0
 public RegularExpression Concat(RegularExpression follow)
 {
     return(new ConcatenationExpression(this, follow));
 }
Beispiel #13
0
        public void WhereGrammaTest()
        {
            Lexicon test = new Lexicon();

            var ID = test.Lexer.DefineToken(RE.Range('a', 'z').Concat(
                                                (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID");
            var NUM     = test.Lexer.DefineToken(RE.Range('0', '9').Many1(), "NUM");
            var GREATER = test.Lexer.DefineToken(RE.Symbol('>'));

            var WHITESPACE = test.Lexer.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "[ ]");

            var p1 = from i in ID
                     from g in GREATER
                     from g2 in GREATER
                     where Grammar.Check(g2.PrefixTrivia.Count == 0, 4, g2.Value.Span)
                     from n in NUM
                     select "A";

            var p2 = from i in ID
                     from g in GREATER
                     from g2 in GREATER
                     from n in NUM
                     select "B";

            var parser1 = p1 | p2;

            parser1.AmbiguityAggregator = (a, b) => a == "A" ? a : b;

            var info = test.CreateScannerInfo();

            var errorManager = new CompilationErrorManager();

            errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'");
            errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'");
            errorManager.DefineError(3, 0, CompilationStage.Parsing, "Syntax error");
            errorManager.DefineError(4, 0, CompilationStage.Parsing, "White spaces between >> are not allowed");

            var el = errorManager.CreateErrorList();

            ProductionInfoManager pim = new ProductionInfoManager(parser1.SuffixedBy(Grammar.Eos()));

            LR0Model lr0 = new LR0Model(pim);

            lr0.BuildModel();

            string dot = lr0.ToString();

            TransitionTable tt     = TransitionTable.Create(lr0, info);
            var             errdef = new SyntaxErrors()
            {
                TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 3
            };
            ParserEngine driver = new ParserEngine(tt, errdef);

            string source1 = "abc >> 123";
            var    sr1     = new SourceReader(new StringReader(source1));

            Scanner scanner = new Scanner(info);

            scanner.SetTriviaTokens(WHITESPACE.Index);
            scanner.SetSource(sr1);

            Lexeme r;

            do
            {
                r = scanner.Read();

                driver.Input(r);
            } while (!r.IsEndOfStream);

            Assert.AreEqual(1, driver.AcceptedCount);
            Assert.AreEqual("A", driver.GetResult(0, el));
            Assert.AreEqual(0, el.Count);

            ParserEngine driver2 = new ParserEngine(tt, errdef);

            string source2 = "abc > > 123";
            var    sr2     = new SourceReader(new StringReader(source2));

            scanner.SetSource(sr2);
            do
            {
                r = scanner.Read();

                driver2.Input(r);
            } while (!r.IsEndOfStream);

            var el2 = errorManager.CreateErrorList();

            Assert.AreEqual(1, driver2.AcceptedCount);
            Assert.AreEqual("B", driver2.GetResult(0, el2));
            Assert.AreEqual(0, el2.Count);
        }
Beispiel #14
0
 public static RegularExpression op_Concatenate(RegularExpression left, RegularExpression right)
 {
     return(new ConcatenationExpression(left, right));
 }
Beispiel #15
0
        public void CompactCharSetTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var lettersCategories = new[] { UnicodeCategory.LetterNumber,
                                            UnicodeCategory.LowercaseLetter,
                                            UnicodeCategory.ModifierLetter,
                                            UnicodeCategory.OtherLetter,
                                            UnicodeCategory.TitlecaseLetter,
                                            UnicodeCategory.UppercaseLetter };

            var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)));


            var ID = global.DefineToken(RE_IDCHAR.Concat(
                                            (RE_IDCHAR | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            var scannerInfo = lexicon.CreateScannerInfo();

            scannerInfo.LexerStateIndex = xml.Index;

            Scanner s = new Scanner(scannerInfo);

            string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if";

            SourceReader sr = new SourceReader(new StringReader(source));

            s.SetSource(sr);
            s.SetTriviaTokens(WHITESPACE.Index);

            var l1 = s.Read();

            Assert.AreEqual(XMLNS.Index, l1.TokenIndex);

            var l2 = s.Read();

            Assert.AreEqual(ID.Index, l2.TokenIndex);

            var l3 = s.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);

            var l4 = s.Read();

            Assert.AreEqual(ID.Index, l4.TokenIndex);

            var l5 = s.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            var l6 = s.Read();

            Assert.AreEqual(IF.Index, l6.TokenIndex);
        }
Beispiel #16
0
        public void SkipTokenTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index);
            info.LexerStateIndex = xml.Index;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.PrefixTrivia.Count);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(NUM.Index, l2.TokenIndex);
            Assert.AreEqual("1107", l2.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ELSE.Index, l3.TokenIndex);
            Assert.AreEqual("else", l3.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(IF.Index, l4.TokenIndex);
            Assert.AreEqual("if", l4.Value);
            Assert.AreEqual(3, l4.PrefixTrivia.Count);


            int p1 = scanner.Peek();

            Assert.AreEqual(ID.Index, p1);

            int p2   = scanner.Peek2();
            int p3   = scanner.Peek(3);
            int peof = scanner.Peek(4);

            Assert.AreEqual(info.EndOfStreamTokenIndex, peof);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(XMLNS.Index, l7.TokenIndex);

            Lexeme l8 = scanner.Read();

            Assert.AreEqual(NUM.Index, l8.TokenIndex);

            Lexeme leof = scanner.Read();

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);
        }
Beispiel #17
0
        public void MultipleLexerParsingTest()
        {
            Lexicon lexicon  = new Lexicon();
            Lexer   global   = lexicon.Lexer;
            Lexer   keywords = global.CreateSubLexer();

            var PROPERTY = global.DefineToken(RE.Literal("property"));
            var ID       = global.DefineToken(RE.Range('a', 'z').Concat(
                                                  (RE.Range('a', 'z') | RE.Range('0', '9')).Many()), "ID");
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1(), "NUM");
            var EQ         = global.DefineToken(RE.Symbol('='));
            var SEMICOLON  = global.DefineToken(RE.Symbol(';'));
            var LB         = global.DefineToken(RE.Symbol('{'));
            var RB         = global.DefineToken(RE.Symbol('}'));
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Union(RE.Symbol('\t')), "[ ]");

            var GET = keywords.DefineToken(RE.Literal("get"));

            var assignStatement =
                from id in ID
                from eq in EQ
                from value in NUM
                from st in SEMICOLON
                select id.Value + "=" + value.Value;

            var getDef =
                from _get in GET
                from lb in LB
                from statements in assignStatement.Many()
                from rb in RB
                select new GetDef {
                Statements = statements
            };

            var propDef =
                from _prop in PROPERTY
                from id in ID
                from lb in LB
                from getdef in getDef
                from rb in RB
                select new PropDef {
                PropName = id.Value.Content, GetDef = getdef
            };

            string       source = "property get { get { get = 1; } }";
            SourceReader sr     = new SourceReader(
                new StringReader(source));

            var     info    = lexicon.CreateScannerInfo();
            Scanner scanner = new Scanner(info);

            scanner.SetTriviaTokens(WHITESPACE.Index);
            scanner.SetSource(sr);

            CompilationErrorManager errorManager = new CompilationErrorManager();

            errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'");
            errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'");
            errorManager.DefineError(3, 0, CompilationStage.Parsing, "Syntax error");

            ProductionInfoManager pim = new ProductionInfoManager(propDef.SuffixedBy(Grammar.Eos()));

            LR0Model lr0 = new LR0Model(pim);

            lr0.BuildModel();

            string dot = lr0.ToString();

            TransitionTable tt = TransitionTable.Create(lr0, info);

            SyntaxErrors errDef = new SyntaxErrors()
            {
                TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 3
            };

            ParserEngine driver = new ParserEngine(tt, errDef);

            Lexeme r;

            do
            {
                r = scanner.Read();

                driver.Input(r);
            } while (!r.IsEndOfStream);

            var el = errorManager.CreateErrorList();

            var result = (PropDef)driver.GetResult(0, el);

            Assert.AreEqual(0, el.Count);
            Assert.AreEqual("get", result.PropName);
            Assert.AreEqual("get=1", result.GetDef.Statements.First());
        }
Beispiel #18
0
        public void ScannerTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.Span.StartLocation.Column);
            Assert.AreEqual(6, l1.Span.EndLocation.Column);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex);
            Assert.AreEqual(" ", l2.Value);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);
            Assert.AreEqual("1107", l3.Value);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex);

            Lexeme l5 = scanner.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            int p1 = scanner.Peek();

            Assert.AreEqual(WHITESPACE.Index, p1);

            int p2 = scanner.Peek2();

            Assert.AreEqual(ERROR.Index, p2);

            int p3 = scanner.Peek(3);

            Assert.AreEqual(WHITESPACE.Index, p3);

            int p4 = scanner.Peek(4);

            Assert.AreEqual(ID.Index, p4);

            int p5 = scanner.Peek(5);

            Assert.AreEqual(WHITESPACE.Index, p5);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(ERROR.Index, l7.TokenIndex);

            int p3_2 = scanner.Peek();

            Assert.AreEqual(p3, p3_2);

            Lexeme l8   = scanner.Read(); // whitespace
            Lexeme l9   = scanner.Read(); // ID:if
            Lexeme l10  = scanner.Read(); // whitespace
            Lexeme l11  = scanner.Read(); // ID:vvv
            Lexeme l12  = scanner.Read(); // whitespace
            Lexeme l13  = scanner.Read(); // ID:xmlns
            Lexeme l14  = scanner.Read(); // whitespace
            Lexeme l15  = scanner.Read(); // NUM:772737
            Lexeme leof = scanner.Read(); // eof

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);

            Lexeme leof2 = scanner.Read(); //after eof, should return eof again

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof2.Span.StartLocation.CharIndex);
        }
Beispiel #19
0
        public void ParserDriverSimpleTest()
        {
            Lexicon test = new Lexicon();

            var X    = test.Lexer.DefineToken(RE.Symbol('x'));
            var PLUS = test.Lexer.DefineToken(RE.Symbol('+'));

            var scannerinfo = test.CreateScannerInfo();

            Production <object> E = new Production <object>(), T = new Production <object>();

            E.Rule =
                (from t in T
                 from plus in PLUS
                 from e in E
                 select(object)(((int)t) + ((int)e))) | T;

            T.Rule =
                from x in X
                select(object) 1;

            ProductionInfoManager pim = new ProductionInfoManager(E.SuffixedBy(Grammar.Eos()));

            LR0Model lr0 = new LR0Model(pim);

            lr0.BuildModel();

            string dot = lr0.ToString();

            TransitionTable tt = TransitionTable.Create(lr0, scannerinfo);

            ParserEngine driver = new ParserEngine(tt, new SyntaxErrors()
            {
                TokenUnexpectedId = 1
            });

            ForkableScannerBuilder builder = new ForkableScannerBuilder(scannerinfo);
            var em = new CompilationErrorManager();;
            var el = em.CreateErrorList();

            builder.ErrorList = el;
            var scanner = builder.Create(new SourceReader(new StringReader("x+x+x")));

            var z1 = scanner.Read();

            driver.Input(z1);

            var z2 = scanner.Read();

            driver.Input(z2);

            var z3 = scanner.Read();

            driver.Input(z3);

            var z4 = scanner.Read();

            driver.Input(z4);

            var z5 = scanner.Read();

            driver.Input(z5);

            var z6 = scanner.Read();

            driver.Input(z6);

            Assert.AreEqual(0, driver.CurrentStackCount);
            Assert.AreEqual(1, driver.AcceptedCount);
            Assert.AreEqual(3, driver.GetResult(0, null));
        }
Beispiel #20
0
        public void ParserErrorRecoveryTest()
        {
            Lexicon binaryTreeSyntax = new Lexicon();
            var     lex = binaryTreeSyntax.Lexer;

            //lex
            Token LEFTPH  = lex.DefineToken(RE.Symbol('('));
            Token RIGHTPH = lex.DefineToken(RE.Symbol(')'));
            Token COMMA   = lex.DefineToken(RE.Symbol(','));
            Token LETTER  = lex.DefineToken(RE.Range('a', 'z') | RE.Range('A', 'Z'), "ID");

            //grammar
            Production <Node> NodeParser = new Production <Node>();

            NodeParser.Rule =
                (from a in LETTER
                 from _1 in LEFTPH
                 from left in NodeParser
                 from _2 in COMMA
                 from right in NodeParser
                 from _3 in RIGHTPH
                 select new Node(a.Value.Content, left, right))
                | Grammar.Empty <Node>(null);

            var builder = new ForkableScannerBuilder(binaryTreeSyntax.CreateScannerInfo());

            const string correct = "A(B(,),C(,))";

            string       source = "A((B(,),C(,)";
            SourceReader sr     = new SourceReader(
                new StringReader(source));

            var     info    = binaryTreeSyntax.CreateScannerInfo();
            Scanner scanner = new Scanner(info);

            scanner.SetSource(sr);

            CompilationErrorManager errorManager = new CompilationErrorManager();

            errorManager.DefineError(1, 0, CompilationStage.Parsing, "Unexpected token '{0}'");
            errorManager.DefineError(2, 0, CompilationStage.Parsing, "Missing token '{0}'");
            errorManager.DefineError(3, 0, CompilationStage.Parsing, "Invalid token found, did you mean '{0}' ?");
            errorManager.DefineError(4, 0, CompilationStage.Parsing, "Syntax error");

            ProductionInfoManager pim = new ProductionInfoManager(NodeParser.SuffixedBy(Grammar.Eos()));

            LR0Model lr0 = new LR0Model(pim);

            lr0.BuildModel();

            string dot = lr0.ToString();

            TransitionTable tt = TransitionTable.Create(lr0, info);

            SyntaxErrors errDef = new SyntaxErrors()
            {
                TokenUnexpectedId = 1, TokenMissingId = 2, OtherErrorId = 4, TokenMistakeId = 3
            };

            ParserEngine driver = new ParserEngine(tt, errDef);

            Lexeme r;

            do
            {
                r = scanner.Read();

                driver.Input(r);
            } while (!r.IsEndOfStream);

            var result = driver.GetResult(0, errorManager.CreateErrorList());

            ;
        }
Beispiel #21
0
        public void ProductionInfoManagerTest()
        {
            Lexicon test = new Lexicon();

            var A = test.Lexer.DefineToken(RE.Symbol('a'));
            var D = test.Lexer.DefineToken(RE.Symbol('d'));
            var C = test.Lexer.DefineToken(RE.Symbol('c'));

            Production <object> X = new Production <object>(), Y = new Production <object>(), Z = new Production <object>();

            Z.Rule =
                (from d in D select d as object) |
                (from x in X
                 from y in Y
                 from z in Z
                 select new { x, y, z } as object);

            Y.Rule =
                Grammar.Empty(new object()) |
                (from c in C select c as object);

            X.Rule =
                Y |
                (from a in A select a as object);

            ProductionInfoManager pis = new ProductionInfoManager(Z);

            var xInfo = pis.GetInfo(X);
            var yInfo = pis.GetInfo(Y);
            var zInfo = pis.GetInfo(Z);

            Assert.IsTrue(xInfo.IsNullable, "X should be nullable");
            Assert.IsTrue(yInfo.IsNullable, "Y should be nullable");
            Assert.IsFalse(zInfo.IsNullable, "Z should not be nullable");

            Assert.AreEqual(xInfo.First.Count, 2);
            Assert.AreEqual(xInfo.Follow.Count, 3);

            Assert.IsTrue(xInfo.First.Contains(A.AsTerminal()));
            Assert.IsTrue(xInfo.First.Contains(C.AsTerminal()));

            Assert.IsTrue(xInfo.Follow.Contains(A.AsTerminal()));
            Assert.IsTrue(xInfo.Follow.Contains(C.AsTerminal()));
            Assert.IsTrue(xInfo.Follow.Contains(D.AsTerminal()));

            Assert.AreEqual(yInfo.First.Count, 1);
            Assert.AreEqual(yInfo.Follow.Count, 3);

            Assert.IsTrue(yInfo.First.Contains(C.AsTerminal()));

            Assert.IsTrue(yInfo.Follow.Contains(A.AsTerminal()));
            Assert.IsTrue(yInfo.Follow.Contains(C.AsTerminal()));
            Assert.IsTrue(yInfo.Follow.Contains(D.AsTerminal()));

            Assert.AreEqual(zInfo.First.Count, 3);
            Assert.AreEqual(zInfo.Follow.Count, 0);

            Assert.IsTrue(zInfo.First.Contains(A.AsTerminal()));
            Assert.IsTrue(zInfo.First.Contains(C.AsTerminal()));
            Assert.IsTrue(zInfo.First.Contains(D.AsTerminal()));
        }
Beispiel #22
0
        public void LexerStateToDFATest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM   = global.DefineToken(RE.Range('0', '9').Many1());
            var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));


            DFAModel dfa = DFAModel.Create(lexicon);

            CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa);

            ScannerInfo si = lexicon.CreateScannerInfo();

            FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable);

            engine.InputString("if");

            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("12345");
            Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("asdf12dd");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("A");
            Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("AAA");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("if ");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            si.LexerStateIndex = keywords.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("else");
            Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            si.LexerStateIndex = xml.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xml");
            Assert.IsFalse(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState));
            ;
        }
Beispiel #23
0
 public Token DefineToken(RegularExpression regex)
 {
     return(DefineToken(regex, null));
 }