static void _RunLexer() { // compile a lexer var prog = Lex.CompileLexerRegex(true, @"[A-Z_a-z][A-Z_a-z0-9]*", // id @"0|(\-?[1-9][0-9]*)", // int @"( |\t|\r|\n|\v|\f)" // space ); // dump the program to the console Console.WriteLine(Lex.Disassemble(prog)); // our test data - 14 tokens. 29 length var text = "fubar bar 123 1foo bar -243 0"; Console.WriteLine("Lex: " + text); // spin up a lexer context // see: https://www.codeproject.com/Articles/5256794/LexContext-A-streamlined-cursor-over-a-text-input var lc = LexContext.Create(text); // while more input to be read while (LexContext.EndOfInput != lc.Current) { // clear any current captured data lc.ClearCapture(); // lex our next input and dump it Console.WriteLine("{0}: \"{1}\"", Lex.Run(prog, lc), lc.GetCapture()); } var sw = new Stopwatch(); const int ITER = 1000; for (var i = 0; i < ITER; ++i) { lc = LexContext.Create(text); while (LexContext.EndOfInput != lc.Current) { lc.ClearCapture(); sw.Start(); var acc = Lex.Run(prog, lc); sw.Stop(); } } Console.WriteLine("Lexed in " + sw.ElapsedMilliseconds / (float)ITER + " msec"); }
static void Main(string[] args) { var text = "foo 123 bar"; using (var sr = new StreamReader(@"..\..\Program.cs")) text = sr.ReadToEnd(); Console.WriteLine("Lex: " + text); var tokenizer = new TestTokenizer(text); // generated from Example.lx Console.WriteLine("Disassembly:"); Console.WriteLine(Lex.Disassemble(TestTokenizer.Program)); Console.WriteLine(); foreach (var tok in tokenizer) { // we don't want errors or whitespace but we don't know the symbol // id for whitespace because you can switch tokenizers around // so we check tok.Value instead if (-1 != tok.SymbolId && !string.IsNullOrWhiteSpace(tok.Value)) { Console.WriteLine("{0}: {1}", tok.SymbolId, tok.Value); } } Stopwatch sw = new Stopwatch(); const int ITER = 1000; for (var i = 0; i < ITER; ++i) { var lc = LexContext.Create(text); while (LexContext.EndOfInput != lc.Current) { lc.ClearCapture(); sw.Start(); var acc = Lex.Run(TestTokenizer.Program, lc); sw.Stop(); } } Console.WriteLine("Lexed in " + sw.ElapsedMilliseconds / (float)ITER + " msec"); }
static void Main2() { var id = @"[A-Z_a-z][A-Z_a-z0-9]*"; var @int = @"0|(\-?[1-9][0-9]*)"; var space = @"( |\t|\r|\n|\v|\f)"; Lex.RenderOptimizedExecutionGraph(id, @"..\..\id_nfa.jpg"); Lex.RenderOptimizedExecutionGraph(@int, @"..\..\int_nfa.jpg"); Lex.RenderOptimizedExecutionGraph(@space, @"..\..\space_nfa.jpg"); var prog = Lex.CompileLexerRegex(true, id, // id @int, // int space // space ); prog = Lex.CompileRegexPart(@int); prog = Lex.FinalizePart(prog); Console.WriteLine(Lex.Disassemble(prog)); Console.WriteLine(Lex.RunWithLogging(prog, LexContext.Create("123"), Console.Out)); }
static void Main() { var test = "fubar bar 123 1foo bar -243 0"; Console.WriteLine("Lex: " + test); var prog = Lex.CompileLexerRegex(false, @"[A-Z_a-z][A-Z_a-z0-9]*", // id @"0|(\-?[1-9][0-9]*)", // int @"( |\t|\r|\n|\v|\f)" // space ); Console.WriteLine("Unoptimized dump:"); Console.WriteLine(Lex.Disassemble(prog)); Console.WriteLine(); var progOpt = Lex.CompileLexerRegex(true, @"[A-Z_a-z][A-Z_a-z0-9]*", // id @"0|(\-?[1-9][0-9]*)", // int @"( |\t|\r|\n|\v|\f)" // space ); Console.WriteLine("Optimized dump:"); Console.WriteLine(Lex.Disassemble(progOpt)); Console.WriteLine(); var progDfa = Lex.AssembleFrom(@"..\..\dfa.lasm"); Console.WriteLine("DFA dump:"); Console.WriteLine(Lex.Disassemble(progDfa)); Console.WriteLine(); for (var i = 0; i < 10; ++i) { Console.WriteLine("Pass #" + (i + 1)); Console.Write("NFA: "); Perf(prog, test); Console.Write("NFA+DFA (optimized): "); Perf(progOpt, test); Console.Write("DFA: "); Perf(progDfa, test); } }
static void Test() { var test = "switch case \"a\":L0001, case \"b\":L0002, default: L0004\r\n" + "L0001: char \"b\"\r\n" + "L0002: char \"c\"\r\n" + "L0003: match 1\r\n" + "L0004: any\r\n" + "L0005: match -1\r\n"; var prog = Lex.AssembleFrom(@"..\..\int.lasm"); //Console.WriteLine(Lex.Disassemble(prog)); var lc = LexContext.Create("1000"); //Console.WriteLine("{0}: {1}",Lex.Run(prog,lc),lc.GetCapture()); // //"((\\(['\\"abfnrtv0]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))|[^\\"])*" test = @"""((\\(['\\""abfnrtv0]|[0-7]{3}|x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))|[^\\""])*"""; //Lex.RenderGraph(LexContext.Create(test),@"..\..\string_nfa.jpg"); prog = Lex.CompileRegexPart(test); prog = Lex.FinalizePart(prog); Console.WriteLine(Lex.Disassemble(prog)); test = "\"\\\"\\tHello World!\\\"\""; lc = LexContext.Create(test); if (-1 != Lex.RunWithLogging(prog, lc, Console.Error)) { Console.Write("Matched " + test + ": "); Console.WriteLine(lc.GetCapture()); } else { Console.Write("Matched " + test + ": "); Console.WriteLine("False - failed at position " + lc.Position); } return; _RunLexer(); }
static void Main(string[] args) { var text = "foo 123 bar"; using (var sr = new StreamReader(@"..\..\Program.cs")) text = sr.ReadToEnd(); // our test data - 14 tokens. 29 length Console.WriteLine("Lex: " + text); var tokenizer = new SlangTokenizer(text); // generated from Example.lx Console.WriteLine("Disassembly:"); Console.WriteLine(Lex.Disassemble(SlangTokenizer.Program)); Console.WriteLine(); foreach (var tok in tokenizer) { Console.WriteLine("{0}: {1}", tok.SymbolId, tok.Value); } var sw = new Stopwatch(); const int ITER = 1000; for (var i = 0; i < ITER; ++i) { var lc = LexContext.Create(text); while (LexContext.EndOfInput != lc.Current) { lc.ClearCapture(); sw.Start(); var acc = Lex.Run(SlangTokenizer.Program, lc); sw.Stop(); } } Console.WriteLine("Lexed in " + sw.ElapsedMilliseconds / (float)ITER + " msec"); }
static int[][] _BuildLexer(IList <_LexRule> rules, bool dump) { if (dump) { Console.WriteLine(); } var exprs = new List <KeyValuePair <int, object> >(); for (int ic = rules.Count, i = 0; i < ic; ++i) { var r = rules[i]; if (null != r.Literal) { exprs.Add(new KeyValuePair <int, object>(r.Id, r.Literal)); } else if (null != r.Regex) { exprs.Add(new KeyValuePair <int, object>(r.Id, Ast.Parse(LexContext.Create(r.Regex)))); } else { exprs.Add(new KeyValuePair <int, object>(r.Id, r.Part)); } } var prog = Lex.CompileLexer(true, exprs.ToArray()); if (dump) { Console.Error.WriteLine("Disassembly of optimized lexer:"); Console.Error.WriteLine(Lex.Disassemble(prog)); Console.Error.WriteLine(); } return(prog); }
static void Main() { var test = "fubar bar 123 1foo bar -243 0 baz 83"; Console.WriteLine("Lex: " + test); var prog = Lex.CompileLexerRegex(false, @"[A-Z_a-z][A-Z_a-z0-9]*", // id @"0|(\-?[1-9][0-9]*)", // int @"( |\t|\r|\n|\v|\f)" // space ); Console.WriteLine("Unoptimized dump:"); Console.WriteLine(Lex.Disassemble(prog)); Console.WriteLine(); var progOpt = Lex.CompileLexerRegex(true, @"[A-Z_a-z][A-Z_a-z0-9]*", // id @"0|(\-?[1-9][0-9]*)", // int @"( |\t|\r|\n|\v|\f)" // space ); Console.WriteLine("Optimized dump:"); Console.WriteLine(Lex.Disassemble(progOpt)); Console.WriteLine(); var progDfa = Lex.AssembleFrom(@"..\..\dfa.lasm"); Console.WriteLine("DFA dump:"); Console.WriteLine(Lex.Disassemble(progDfa)); Console.WriteLine(); var nfa = FA.ToLexer(new FA[] { FA.Parse(@"[A-Z_a-z][A-Z_a-z0-9]*", 0), FA.Parse(@"0|(\-?[1-9][0-9]*)", 1), FA.Parse(@"( |\t|\r|\n|\v|\f)", 2) }); var dfa = nfa.ToDfa(); dfa.TrimDuplicates(); var dfaTable = dfa.ToDfaStateTable(); var result = -1; var count = 0f; var maxFiberCount = 0; var avgCharPasses = 0f; LexContext lc = LexContext.Create(test); while (LexContext.EndOfInput != lc.Current) { var stats = Lex.RunWithLoggingAndStatistics(prog, lc, TextWriter.Null, out result); maxFiberCount = stats.MaxFiberCount; if (stats.AverageCharacterPasses > avgCharPasses) { avgCharPasses = stats.AverageCharacterPasses; } ++count; } Console.WriteLine("NFA ran with " + maxFiberCount + " max fibers and " + avgCharPasses + " average char passes"); count = 0f; maxFiberCount = 0; avgCharPasses = 0f; count = 0; lc = LexContext.Create(test); while (LexContext.EndOfInput != lc.Current) { var stats = Lex.RunWithLoggingAndStatistics(progOpt, lc, TextWriter.Null, out result); maxFiberCount = stats.MaxFiberCount; if (stats.AverageCharacterPasses > avgCharPasses) { avgCharPasses = stats.AverageCharacterPasses; } ++count; } Console.WriteLine("NFA+DFA (optimized) ran with " + maxFiberCount + " max fibers and " + avgCharPasses + " average char passes"); count = 0; maxFiberCount = 0; avgCharPasses = 0f; lc = LexContext.Create(test); while (LexContext.EndOfInput != lc.Current) { var stats = Lex.RunWithLoggingAndStatistics(progDfa, lc, TextWriter.Null, out result); maxFiberCount = stats.MaxFiberCount; if (stats.AverageCharacterPasses > avgCharPasses) { avgCharPasses = stats.AverageCharacterPasses; } ++count; } Console.WriteLine("DFA ran with " + maxFiberCount + " max fibers and " + avgCharPasses + " average char passes"); #if RUNPERF for (var i = 0; i < 5; ++i) { test = string.Concat(test, test); } for (var i = 0; i < 10; ++i) { Console.WriteLine("Pass #" + (i + 1)); Console.Write("NFA: "); _Perf(prog, test); Console.WriteLine(); Console.Write("NFA+DFA (optimized): "); _Perf(progOpt, test); Console.WriteLine(); Console.Write("DFA: "); _Perf(progDfa, test); Console.WriteLine(); Console.Write("NFA (raw): "); _Perf(nfa, test); Console.WriteLine(); Console.Write("DFA (raw): "); _Perf(dfaTable, test); Console.WriteLine(); Console.WriteLine(); } Console.WriteLine(); #endif _RunLexer(progOpt); Console.Error.WriteLine(); }