Ejemplo n.º 1
0
        static void _RunLexer()
        {
            // compile a lexer
            var prog = Lex.CompileLexerRegex(true,
                                             @"[A-Z_a-z][A-Z_a-z0-9]*", // id
                                             @"0|(\-?[1-9][0-9]*)",     // int
                                             @"( |\t|\r|\n|\v|\f)"      // space
                                             );

            // dump the program to the console
            Console.WriteLine(Lex.Disassemble(prog));

            // our test data - 14 tokens. 29 length
            var text = "fubar bar 123 1foo bar -243 0";

            Console.WriteLine("Lex: " + text);

            // spin up a lexer context
            // see: https://www.codeproject.com/Articles/5256794/LexContext-A-streamlined-cursor-over-a-text-input
            var lc = LexContext.Create(text);

            // while more input to be read
            while (LexContext.EndOfInput != lc.Current)
            {
                // clear any current captured data
                lc.ClearCapture();
                // lex our next input and dump it
                Console.WriteLine("{0}: \"{1}\"", Lex.Run(prog, lc), lc.GetCapture());
            }
            var       sw   = new Stopwatch();
            const int ITER = 1000;

            for (var i = 0; i < ITER; ++i)
            {
                lc = LexContext.Create(text);
                while (LexContext.EndOfInput != lc.Current)
                {
                    lc.ClearCapture();
                    sw.Start();
                    var acc = Lex.Run(prog, lc);
                    sw.Stop();
                }
            }
            Console.WriteLine("Lexed in " + sw.ElapsedMilliseconds / (float)ITER + " msec");
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            var text = "foo 123 bar";

            using (var sr = new StreamReader(@"..\..\Program.cs"))
                text = sr.ReadToEnd();

            Console.WriteLine("Lex: " + text);

            var tokenizer = new TestTokenizer(text);             // generated from Example.lx

            Console.WriteLine("Disassembly:");
            Console.WriteLine(Lex.Disassemble(TestTokenizer.Program));
            Console.WriteLine();

            foreach (var tok in tokenizer)
            {
                // we don't want errors or whitespace but we don't know the symbol
                // id for whitespace because you can switch tokenizers around
                // so we check tok.Value instead
                if (-1 != tok.SymbolId && !string.IsNullOrWhiteSpace(tok.Value))
                {
                    Console.WriteLine("{0}: {1}", tok.SymbolId, tok.Value);
                }
            }

            Stopwatch sw   = new Stopwatch();
            const int ITER = 1000;

            for (var i = 0; i < ITER; ++i)
            {
                var lc = LexContext.Create(text);
                while (LexContext.EndOfInput != lc.Current)
                {
                    lc.ClearCapture();
                    sw.Start();
                    var acc = Lex.Run(TestTokenizer.Program, lc);
                    sw.Stop();
                }
            }

            Console.WriteLine("Lexed in " + sw.ElapsedMilliseconds / (float)ITER + " msec");
        }
Ejemplo n.º 3
0
        static void Main2()
        {
            var id    = @"[A-Z_a-z][A-Z_a-z0-9]*";
            var @int  = @"0|(\-?[1-9][0-9]*)";
            var space = @"( |\t|\r|\n|\v|\f)";

            Lex.RenderOptimizedExecutionGraph(id, @"..\..\id_nfa.jpg");
            Lex.RenderOptimizedExecutionGraph(@int, @"..\..\int_nfa.jpg");
            Lex.RenderOptimizedExecutionGraph(@space, @"..\..\space_nfa.jpg");
            var prog = Lex.CompileLexerRegex(true,
                                             id,   // id
                                             @int, // int
                                             space // space
                                             );

            prog = Lex.CompileRegexPart(@int);
            prog = Lex.FinalizePart(prog);
            Console.WriteLine(Lex.Disassemble(prog));
            Console.WriteLine(Lex.RunWithLogging(prog, LexContext.Create("123"), Console.Out));
        }
Ejemplo n.º 4
0
        static void Main()
        {
            var test = "fubar bar 123 1foo bar -243 0";

            Console.WriteLine("Lex: " + test);
            var prog = Lex.CompileLexerRegex(false,
                                             @"[A-Z_a-z][A-Z_a-z0-9]*", // id
                                             @"0|(\-?[1-9][0-9]*)",     // int
                                             @"( |\t|\r|\n|\v|\f)"      // space
                                             );

            Console.WriteLine("Unoptimized dump:");
            Console.WriteLine(Lex.Disassemble(prog));
            Console.WriteLine();
            var progOpt = Lex.CompileLexerRegex(true,
                                                @"[A-Z_a-z][A-Z_a-z0-9]*", // id
                                                @"0|(\-?[1-9][0-9]*)",     // int
                                                @"( |\t|\r|\n|\v|\f)"      // space
                                                );

            Console.WriteLine("Optimized dump:");
            Console.WriteLine(Lex.Disassemble(progOpt));
            Console.WriteLine();
            var progDfa = Lex.AssembleFrom(@"..\..\dfa.lasm");

            Console.WriteLine("DFA dump:");
            Console.WriteLine(Lex.Disassemble(progDfa));
            Console.WriteLine();
            for (var i = 0; i < 10; ++i)
            {
                Console.WriteLine("Pass #" + (i + 1));
                Console.Write("NFA: ");
                Perf(prog, test);
                Console.Write("NFA+DFA (optimized): ");
                Perf(progOpt, test);

                Console.Write("DFA: ");
                Perf(progDfa, test);
            }
        }
Ejemplo n.º 5
0
        static void Test()
        {
            var test = "switch case \"a\":L0001, case \"b\":L0002, default: L0004\r\n" +
                       "L0001: char \"b\"\r\n" +
                       "L0002: char \"c\"\r\n" +
                       "L0003: match 1\r\n" +
                       "L0004: any\r\n" +
                       "L0005: match -1\r\n";

            var prog = Lex.AssembleFrom(@"..\..\int.lasm");

            //Console.WriteLine(Lex.Disassemble(prog));
            var lc = LexContext.Create("1000");

            //Console.WriteLine("{0}: {1}",Lex.Run(prog,lc),lc.GetCapture());
            //
            //"((\\(['\\"abfnrtv0]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))|[^\\"])*"
            test = @"""((\\(['\\""abfnrtv0]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))|[^\\""])*""";
            //Lex.RenderGraph(LexContext.Create(test),@"..\..\string_nfa.jpg");
            prog = Lex.CompileRegexPart(test);
            prog = Lex.FinalizePart(prog);
            Console.WriteLine(Lex.Disassemble(prog));
            test = "\"\\\"\\tHello World!\\\"\"";
            lc   = LexContext.Create(test);
            if (-1 != Lex.RunWithLogging(prog, lc, Console.Error))
            {
                Console.Write("Matched " + test + ": ");
                Console.WriteLine(lc.GetCapture());
            }
            else
            {
                Console.Write("Matched " + test + ": ");
                Console.WriteLine("False - failed at position " + lc.Position);
            }
            return;

            _RunLexer();
        }
Ejemplo n.º 6
0
        static void Main(string[] args)
        {
            var text = "foo 123 bar";

            using (var sr = new StreamReader(@"..\..\Program.cs"))
                text = sr.ReadToEnd();
            // our test data - 14 tokens. 29 length

            Console.WriteLine("Lex: " + text);

            var tokenizer = new SlangTokenizer(text);             // generated from Example.lx

            Console.WriteLine("Disassembly:");
            Console.WriteLine(Lex.Disassemble(SlangTokenizer.Program));
            Console.WriteLine();

            foreach (var tok in tokenizer)
            {
                Console.WriteLine("{0}: {1}", tok.SymbolId, tok.Value);
            }

            var       sw   = new Stopwatch();
            const int ITER = 1000;

            for (var i = 0; i < ITER; ++i)
            {
                var lc = LexContext.Create(text);
                while (LexContext.EndOfInput != lc.Current)
                {
                    lc.ClearCapture();
                    sw.Start();
                    var acc = Lex.Run(SlangTokenizer.Program, lc);
                    sw.Stop();
                }
            }
            Console.WriteLine("Lexed in " + sw.ElapsedMilliseconds / (float)ITER + " msec");
        }
Ejemplo n.º 7
0
        static int[][] _BuildLexer(IList <_LexRule> rules, bool dump)
        {
            if (dump)
            {
                Console.WriteLine();
            }

            var exprs = new List <KeyValuePair <int, object> >();

            for (int ic = rules.Count, i = 0; i < ic; ++i)
            {
                var r = rules[i];
                if (null != r.Literal)
                {
                    exprs.Add(new KeyValuePair <int, object>(r.Id, r.Literal));
                }
                else if (null != r.Regex)
                {
                    exprs.Add(new KeyValuePair <int, object>(r.Id, Ast.Parse(LexContext.Create(r.Regex))));
                }
                else
                {
                    exprs.Add(new KeyValuePair <int, object>(r.Id, r.Part));
                }
            }
            var prog = Lex.CompileLexer(true, exprs.ToArray());

            if (dump)
            {
                Console.Error.WriteLine("Disassembly of optimized lexer:");
                Console.Error.WriteLine(Lex.Disassemble(prog));
                Console.Error.WriteLine();
            }

            return(prog);
        }
Ejemplo n.º 8
0
        static void Main()
        {
            var test = "fubar bar 123 1foo bar -243 0 baz 83";

            Console.WriteLine("Lex: " + test);
            var prog = Lex.CompileLexerRegex(false,
                                             @"[A-Z_a-z][A-Z_a-z0-9]*", // id
                                             @"0|(\-?[1-9][0-9]*)",     // int
                                             @"( |\t|\r|\n|\v|\f)"      // space
                                             );

            Console.WriteLine("Unoptimized dump:");
            Console.WriteLine(Lex.Disassemble(prog));
            Console.WriteLine();

            var progOpt = Lex.CompileLexerRegex(true,
                                                @"[A-Z_a-z][A-Z_a-z0-9]*", // id
                                                @"0|(\-?[1-9][0-9]*)",     // int
                                                @"( |\t|\r|\n|\v|\f)"      // space
                                                );

            Console.WriteLine("Optimized dump:");
            Console.WriteLine(Lex.Disassemble(progOpt));
            Console.WriteLine();
            var progDfa = Lex.AssembleFrom(@"..\..\dfa.lasm");

            Console.WriteLine("DFA dump:");
            Console.WriteLine(Lex.Disassemble(progDfa));
            Console.WriteLine();

            var nfa = FA.ToLexer(new FA[]
            {
                FA.Parse(@"[A-Z_a-z][A-Z_a-z0-9]*", 0),
                FA.Parse(@"0|(\-?[1-9][0-9]*)", 1),
                FA.Parse(@"( |\t|\r|\n|\v|\f)", 2)
            });
            var dfa = nfa.ToDfa();

            dfa.TrimDuplicates();
            var dfaTable = dfa.ToDfaStateTable();

            var        result        = -1;
            var        count         = 0f;
            var        maxFiberCount = 0;
            var        avgCharPasses = 0f;
            LexContext lc            = LexContext.Create(test);

            while (LexContext.EndOfInput != lc.Current)
            {
                var stats = Lex.RunWithLoggingAndStatistics(prog, lc, TextWriter.Null, out result);
                maxFiberCount = stats.MaxFiberCount;
                if (stats.AverageCharacterPasses > avgCharPasses)
                {
                    avgCharPasses = stats.AverageCharacterPasses;
                }

                ++count;
            }
            Console.WriteLine("NFA ran with " + maxFiberCount + " max fibers and " + avgCharPasses + " average char passes");
            count         = 0f;
            maxFiberCount = 0;
            avgCharPasses = 0f;
            count         = 0;
            lc            = LexContext.Create(test);
            while (LexContext.EndOfInput != lc.Current)
            {
                var stats = Lex.RunWithLoggingAndStatistics(progOpt, lc, TextWriter.Null, out result);
                maxFiberCount = stats.MaxFiberCount;
                if (stats.AverageCharacterPasses > avgCharPasses)
                {
                    avgCharPasses = stats.AverageCharacterPasses;
                }

                ++count;
            }
            Console.WriteLine("NFA+DFA (optimized) ran with " + maxFiberCount + " max fibers and " + avgCharPasses + " average char passes");
            count         = 0;
            maxFiberCount = 0;
            avgCharPasses = 0f;
            lc            = LexContext.Create(test);
            while (LexContext.EndOfInput != lc.Current)
            {
                var stats = Lex.RunWithLoggingAndStatistics(progDfa, lc, TextWriter.Null, out result);
                maxFiberCount = stats.MaxFiberCount;
                if (stats.AverageCharacterPasses > avgCharPasses)
                {
                    avgCharPasses = stats.AverageCharacterPasses;
                }

                ++count;
            }
            Console.WriteLine("DFA ran with " + maxFiberCount + " max fibers and " + avgCharPasses + " average char passes");
#if RUNPERF
            for (var i = 0; i < 5; ++i)
            {
                test = string.Concat(test, test);
            }

            for (var i = 0; i < 10; ++i)
            {
                Console.WriteLine("Pass #" + (i + 1));
                Console.Write("NFA: ");
                _Perf(prog, test);
                Console.WriteLine();
                Console.Write("NFA+DFA (optimized): ");
                _Perf(progOpt, test);
                Console.WriteLine();
                Console.Write("DFA: ");
                _Perf(progDfa, test);
                Console.WriteLine();
                Console.Write("NFA (raw): ");
                _Perf(nfa, test);
                Console.WriteLine();
                Console.Write("DFA (raw): ");
                _Perf(dfaTable, test);
                Console.WriteLine();
                Console.WriteLine();
            }
            Console.WriteLine();
#endif
            _RunLexer(progOpt);
            Console.Error.WriteLine();
        }