Esempio n. 1
0
        public void LexerStateToDFATest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM   = global.DefineToken(RE.Range('0', '9').Many1());
            var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));


            DFAModel dfa = DFAModel.Create(lexicon);

            CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa);

            ScannerInfo si = lexicon.CreateScannerInfo();

            FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable);

            engine.InputString("if");

            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("12345");
            Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("asdf12dd");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("A");
            Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("AAA");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("if ");
            Assert.IsTrue(engine.IsAtStoppedState);

            engine.Reset();
            si.LexerStateIndex = keywords.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("else");
            Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            si.LexerStateIndex = xml.Index;
            engine.InputString("if");
            Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState));

            engine.Reset();
            engine.InputString("xml");
            Assert.IsFalse(engine.IsAtStoppedState);

            engine.Reset();
            engine.InputString("xmlns");
            Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState));
            ;
        }
Esempio n. 2
0
        public void SkipTokenTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));
            scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index);
            info.LexerStateIndex = xml.Index;

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.PrefixTrivia.Count);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(NUM.Index, l2.TokenIndex);
            Assert.AreEqual("1107", l2.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(ELSE.Index, l3.TokenIndex);
            Assert.AreEqual("else", l3.Value);
            Assert.AreEqual(1, l2.PrefixTrivia.Count);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(IF.Index, l4.TokenIndex);
            Assert.AreEqual("if", l4.Value);
            Assert.AreEqual(3, l4.PrefixTrivia.Count);


            int p1 = scanner.Peek();

            Assert.AreEqual(ID.Index, p1);

            int p2   = scanner.Peek2();
            int p3   = scanner.Peek(3);
            int peof = scanner.Peek(4);

            Assert.AreEqual(info.EndOfStreamTokenIndex, peof);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(XMLNS.Index, l7.TokenIndex);

            Lexeme l8 = scanner.Read();

            Assert.AreEqual(NUM.Index, l8.TokenIndex);

            Lexeme leof = scanner.Read();

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);
        }
Esempio n. 3
0
        public void CompactCharSetTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var lettersCategories = new[] { UnicodeCategory.LetterNumber,
                                            UnicodeCategory.LowercaseLetter,
                                            UnicodeCategory.ModifierLetter,
                                            UnicodeCategory.OtherLetter,
                                            UnicodeCategory.TitlecaseLetter,
                                            UnicodeCategory.UppercaseLetter };

            var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c)));


            var ID = global.DefineToken(RE_IDCHAR.Concat(
                                            (RE_IDCHAR | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            var scannerInfo = lexicon.CreateScannerInfo();

            scannerInfo.LexerStateIndex = xml.Index;

            Scanner s = new Scanner(scannerInfo);

            string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if";

            SourceReader sr = new SourceReader(new StringReader(source));

            s.SetSource(sr);
            s.SetTriviaTokens(WHITESPACE.Index);

            var l1 = s.Read();

            Assert.AreEqual(XMLNS.Index, l1.TokenIndex);

            var l2 = s.Read();

            Assert.AreEqual(ID.Index, l2.TokenIndex);

            var l3 = s.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);

            var l4 = s.Read();

            Assert.AreEqual(ID.Index, l4.TokenIndex);

            var l5 = s.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            var l6 = s.Read();

            Assert.AreEqual(IF.Index, l6.TokenIndex);
        }
Esempio n. 4
0
        public void ScannerTest()
        {
            Lexicon    lexicon  = new Lexicon();
            LexerState global   = lexicon.DefaultLexer;
            LexerState keywords = global.CreateSubState();
            LexerState xml      = keywords.CreateSubState();

            var ID = global.DefineToken(RE.Range('a', 'z').Concat(
                                            (RE.Range('a', 'z') | RE.Range('0', '9')).Many()));
            var NUM        = global.DefineToken(RE.Range('0', '9').Many1());
            var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many());
            var ERROR      = global.DefineToken(RE.Range(Char.MinValue, (char)255));

            var IF   = keywords.DefineToken(RE.Literal("if"));
            var ELSE = keywords.DefineToken(RE.Literal("else"));

            var XMLNS = xml.DefineToken(RE.Literal("xmlns"));

            ScannerInfo     info    = lexicon.CreateScannerInfo();
            PeekableScanner scanner = new PeekableScanner(info);

            string       source = "asdf04a 1107 else Z if vvv xmlns 772737";
            StringReader sr     = new StringReader(source);

            scanner.SetSource(new SourceReader(sr));

            Lexeme l1 = scanner.Read();

            Assert.AreEqual(ID.Index, l1.TokenIndex);
            Assert.AreEqual("asdf04a", l1.Value);
            Assert.AreEqual(0, l1.Span.StartLocation.Column);
            Assert.AreEqual(6, l1.Span.EndLocation.Column);

            Lexeme l2 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex);
            Assert.AreEqual(" ", l2.Value);

            Lexeme l3 = scanner.Read();

            Assert.AreEqual(NUM.Index, l3.TokenIndex);
            Assert.AreEqual("1107", l3.Value);

            Lexeme l4 = scanner.Read();

            Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex);

            Lexeme l5 = scanner.Read();

            Assert.AreEqual(ID.Index, l5.TokenIndex);

            int p1 = scanner.Peek();

            Assert.AreEqual(WHITESPACE.Index, p1);

            int p2 = scanner.Peek2();

            Assert.AreEqual(ERROR.Index, p2);

            int p3 = scanner.Peek(3);

            Assert.AreEqual(WHITESPACE.Index, p3);

            int p4 = scanner.Peek(4);

            Assert.AreEqual(ID.Index, p4);

            int p5 = scanner.Peek(5);

            Assert.AreEqual(WHITESPACE.Index, p5);

            Lexeme l6 = scanner.Read();
            Lexeme l7 = scanner.Read();

            Assert.AreEqual(ERROR.Index, l7.TokenIndex);

            int p3_2 = scanner.Peek();

            Assert.AreEqual(p3, p3_2);

            Lexeme l8   = scanner.Read(); // whitespace
            Lexeme l9   = scanner.Read(); // ID:if
            Lexeme l10  = scanner.Read(); // whitespace
            Lexeme l11  = scanner.Read(); // ID:vvv
            Lexeme l12  = scanner.Read(); // whitespace
            Lexeme l13  = scanner.Read(); // ID:xmlns
            Lexeme l14  = scanner.Read(); // whitespace
            Lexeme l15  = scanner.Read(); // NUM:772737
            Lexeme leof = scanner.Read(); // eof

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex);
            Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex);

            Lexeme leof2 = scanner.Read(); //after eof, should return eof again

            Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex);
            Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof2.Span.StartLocation.CharIndex);
        }