예제 #1
0
        private void GenerateTokeniser(C.CsClass cls)
        {
            var skipmark = new List <string>();

            var tok = new C.CsMethod
            {
                IsPublic = false,
                Name     = "NextToken",
                RetType  = "Tuple<TokenType, string>"
            };

            // init phase
            tok.AddCode("TokenType t = TokenType.TUndefined;");
            tok.AddCode("string s = \"\";");
            // EOF phase
            tok.AddCode(new C.IfThenElse("Pos >= Input.Length",
                                         "return new Tuple<TokenType, string>(TokenType.TEOF, \"\")"));
            // skip
            if (Tokens.ContainsKey("skip"))
            {
                string cond = String.Join(" || ", Tokens["skip"].Select(t => $"Input[Pos] == '{t.Value}'"));
                tok.AddCode(new C.WhileStmt($"Pos < Input.Length && ({cond})", "Pos++"));
                Tokens["skip"].ForEach(t => skipmark.Add(t.Value));
            }
            else
            {
                Console.WriteLine($"[IR] It is suspicious that there are no tokens of type 'skip'");
            }

            // EOF after skip
            var megaIf = new C.IfThenElse("Pos >= Input.Length",
                                          "return new Tuple<TokenType, string>(TokenType.TEOF, \"\")");

            tok.AddCode(megaIf);
            // mark
            if (Tokens.ContainsKey("mark"))
            {
                Tokens["mark"].ForEach(t => skipmark.Add(t.Value));
                GenerateBranches("mark", megaIf, null);
            }
            else
            {
                Console.WriteLine("[IR] It is suspicious that there are no tokens of type 'word'");
            }

            // word
            if (Tokens.ContainsKey("word"))
            {
                GenerateBranches("word", megaIf, skipmark);
            }
            else
            {
                Console.WriteLine("[IR] It is suspicious that there are no tokens of type 'word'");
            }
            // number etc
            foreach (var tt in Tokens.Keys.Where(tt => tt != "skip" && tt != "word" && tt != "mark"))
            {
                GenerateBranches(tt, megaIf, skipmark);
            }
            tok.AddCode("return new Tuple<TokenType, string>(t, s);");

            cls.AddMethod(tok);
        }
예제 #2
0
        public C.CsClass GenerateParser()
        {
            var p = new C.CsClass
            {
                NS    = NS,
                Name  = "Parser",
                Super = "BaseParser"
            };

            p.AddUsing("EngageRuntime");
            p.AddUsing("System");
            p.AddUsing("System.Collections.Generic");
            if (BoolFlags.Count > 0)
            {
                p.AddField(String.Join(", ", BoolFlags.OrderBy(x => x)), "bool", isPublic: false);
            }
            if (IntFlags.Count > 0)
            {
                p.AddField(String.Join(", ", IntFlags.OrderBy(x => x)), "int", isPublic: false);
            }
            // token types
            var tt = new C.CsEnum
            {
                IsPublic = false,
                Name     = "TokenType"
            };

            tt.Add("TUndefined");
            tt.Add("TEOF");
            tt.Add(Tokens.Keys.Where(t => t != "skip").Select(t => "T" + t));
            p.AddInner(tt);
            // parser constructor
            var pc = new C.CsConstructor
            {
                InheritFromBase = true
            };

            pc.AddArgument("input", "string");
            p.AddConstructor(pc);
            // the parse function
            var pf = new C.CsMethod
            {
                Name    = "Parse",
                RetType = "object"
            };

            pf.AddCode("string ERROR = \"\"");
            pf.AddCode("TokenType type");
            pf.AddCode("string lexeme");
            var loop = new List <C.CsStmt>();
            var pl   = new C.WhileStmt("type != TokenType.TEOF", reversed: true);

            // main parsing loop: begin
            pl.AddCode("var _token = NextToken();");
            pl.AddCode("lexeme = _token.Item2;");
            pl.AddCode("type = _token.Item1;");

            var swType = new C.SwitchCaseStmt
            {
                Expression = "type"
            };

            var usedTokens = new HashSet <string> {
                "skip"
            };

            foreach (var hpk in Handlers.Keys)
            {
                var branchType = new List <C.CsStmt>();
                if (hpk == "EOF")
                {
                    branchType.Add(new C.SimpleStmt("Flush()"));
                }
                if (Handlers[hpk].Count == 1)
                {
                    Handlers[hpk][0].GenerateAbstractCode(branchType);
                }
                else
                {
                    var swLex = new C.SwitchCaseStmt();
                    // much faster to switch-case on a char than on a string
                    bool matchChar = Handlers[hpk].Select(hp => hp.ReactOn.Value).All(v => v.Length == 1);
                    swLex.Expression = "lexeme" + (matchChar ? "[0]" : "");
                    // Need this dance because there may be different actions for the same token with different guards
                    HandlerMetaCollection resortedHandlers = new HandlerMetaCollection();
                    foreach (var hp in Handlers[hpk])
                    {
                        resortedHandlers.Add(hp);
                    }
                    foreach (var key in resortedHandlers.SortedKeys())
                    {
                        GenerateLexBranch(swLex, hpk, resortedHandlers.GuardFlags(key), resortedHandlers.Recipes(key),
                                          key, matchChar);
                    }
                    branchType.Add(swLex);
                }

                swType.Branches["TokenType.T" + hpk] = branchType;
                usedTokens.Add(hpk);
            }

            foreach (var t in Tokens.Keys)
            {
                if (!usedTokens.Contains(t))
                {
                    Console.WriteLine($"[B2C] unused token {t}");
                }
                foreach (B.TokenPlan tok in Tokens[t])
                {
                    if (!tok.Special)
                    {
                        continue;
                    }
                    var    branchType = new List <C.CsStmt>();
                    string todo       = tok.Value switch
                    {
                        "number" => "System.Int32.Parse(lexeme)",
                        "string" => "lexeme",
                        _ => ""
                    };
                    todo = PossiblyWrap(todo, tok.Value);
                    branchType.Add(new C.SimpleStmt($"Push({todo})"));

                    swType.Branches["TokenType.T" + t] = branchType;
                }
            }

            pl.AddCode(swType);
            const string cond  = "!System.String.IsNullOrEmpty(ERROR)";
            var          abend = new C.IfThenElse();

            abend.AddToBranch(cond, "Console.WriteLine(\"Parser error: \" + ERROR);");
            abend.AddToBranch(cond, "return null;");
            pl.AddCode(abend);
            // main parsing loop: end

            pf.AddCode(pl);
            pf.AddCode(new C.IfThenElse($"Main.Peek() is {TopType}", "return Main.Pop()"));
            pf.AddCode("return null"); // TODO!!!
            p.AddMethod(pf);

            // other methods
            GenerateTokeniser(p);

            return(p);
        }