private void GenerateTokeniser(C.CsClass cls) { var skipmark = new List <string>(); var tok = new C.CsMethod { IsPublic = false, Name = "NextToken", RetType = "Tuple<TokenType, string>" }; // init phase tok.AddCode("TokenType t = TokenType.TUndefined;"); tok.AddCode("string s = \"\";"); // EOF phase tok.AddCode(new C.IfThenElse("Pos >= Input.Length", "return new Tuple<TokenType, string>(TokenType.TEOF, \"\")")); // skip if (Tokens.ContainsKey("skip")) { string cond = String.Join(" || ", Tokens["skip"].Select(t => $"Input[Pos] == '{t.Value}'")); tok.AddCode(new C.WhileStmt($"Pos < Input.Length && ({cond})", "Pos++")); Tokens["skip"].ForEach(t => skipmark.Add(t.Value)); } else { Console.WriteLine($"[IR] It is suspicious that there are no tokens of type 'skip'"); } // EOF after skip var megaIf = new C.IfThenElse("Pos >= Input.Length", "return new Tuple<TokenType, string>(TokenType.TEOF, \"\")"); tok.AddCode(megaIf); // mark if (Tokens.ContainsKey("mark")) { Tokens["mark"].ForEach(t => skipmark.Add(t.Value)); GenerateBranches("mark", megaIf, null); } else { Console.WriteLine("[IR] It is suspicious that there are no tokens of type 'word'"); } // word if (Tokens.ContainsKey("word")) { GenerateBranches("word", megaIf, skipmark); } else { Console.WriteLine("[IR] It is suspicious that there are no tokens of type 'word'"); } // number etc foreach (var tt in Tokens.Keys.Where(tt => tt != "skip" && tt != "word" && tt != "mark")) { GenerateBranches(tt, megaIf, skipmark); } tok.AddCode("return new Tuple<TokenType, string>(t, s);"); cls.AddMethod(tok); }
public C.CsClass GenerateParser() { var p = new C.CsClass { NS = NS, Name = "Parser", Super = "BaseParser" }; p.AddUsing("EngageRuntime"); p.AddUsing("System"); p.AddUsing("System.Collections.Generic"); if (BoolFlags.Count > 0) { p.AddField(String.Join(", ", BoolFlags.OrderBy(x => x)), "bool", isPublic: false); } if (IntFlags.Count > 0) { p.AddField(String.Join(", ", IntFlags.OrderBy(x => x)), "int", isPublic: false); } // token types var tt = new C.CsEnum { IsPublic = false, Name = "TokenType" }; tt.Add("TUndefined"); tt.Add("TEOF"); tt.Add(Tokens.Keys.Where(t => t != "skip").Select(t => "T" + t)); p.AddInner(tt); // parser constructor var pc = new C.CsConstructor { InheritFromBase = true }; pc.AddArgument("input", "string"); p.AddConstructor(pc); // the parse function var pf = new C.CsMethod { Name = "Parse", RetType = "object" }; pf.AddCode("string ERROR = \"\""); pf.AddCode("TokenType type"); pf.AddCode("string lexeme"); var loop = new List <C.CsStmt>(); var pl = new C.WhileStmt("type != TokenType.TEOF", reversed: true); // main parsing loop: begin pl.AddCode("var _token = NextToken();"); pl.AddCode("lexeme = _token.Item2;"); pl.AddCode("type = _token.Item1;"); var swType = new C.SwitchCaseStmt { Expression = "type" }; var usedTokens = new HashSet <string> { "skip" }; foreach (var hpk in Handlers.Keys) { var branchType = new List <C.CsStmt>(); if (hpk == "EOF") { branchType.Add(new C.SimpleStmt("Flush()")); } if (Handlers[hpk].Count == 1) { Handlers[hpk][0].GenerateAbstractCode(branchType); } else { var swLex = new C.SwitchCaseStmt(); // much faster to switch-case on a char than on a string bool matchChar = Handlers[hpk].Select(hp => hp.ReactOn.Value).All(v => v.Length == 1); swLex.Expression = "lexeme" + (matchChar ? "[0]" : ""); // Need this dance because there may be different actions for the same token with different guards HandlerMetaCollection resortedHandlers = new HandlerMetaCollection(); foreach (var hp in Handlers[hpk]) { resortedHandlers.Add(hp); } foreach (var key in resortedHandlers.SortedKeys()) { GenerateLexBranch(swLex, hpk, resortedHandlers.GuardFlags(key), resortedHandlers.Recipes(key), key, matchChar); } branchType.Add(swLex); } swType.Branches["TokenType.T" + hpk] = branchType; usedTokens.Add(hpk); } foreach (var t in Tokens.Keys) { if (!usedTokens.Contains(t)) { Console.WriteLine($"[B2C] unused token {t}"); } foreach (B.TokenPlan tok in Tokens[t]) { if (!tok.Special) { continue; } var branchType = new List <C.CsStmt>(); string todo = tok.Value switch { "number" => "System.Int32.Parse(lexeme)", "string" => "lexeme", _ => "" }; todo = PossiblyWrap(todo, tok.Value); branchType.Add(new C.SimpleStmt($"Push({todo})")); swType.Branches["TokenType.T" + t] = branchType; } } pl.AddCode(swType); const string cond = "!System.String.IsNullOrEmpty(ERROR)"; var abend = new C.IfThenElse(); abend.AddToBranch(cond, "Console.WriteLine(\"Parser error: \" + ERROR);"); abend.AddToBranch(cond, "return null;"); pl.AddCode(abend); // main parsing loop: end pf.AddCode(pl); pf.AddCode(new C.IfThenElse($"Main.Peek() is {TopType}", "return Main.Pop()")); pf.AddCode("return null"); // TODO!!! p.AddMethod(pf); // other methods GenerateTokeniser(p); return(p); }