public GrammarRootAST(GrammarRootAST node) : base(node) { this.grammarType = node.grammarType; this.hasErrors = node.hasErrors; this.tokenStream = node.tokenStream; }
public LeftFactoringRuleTransformer([NotNull] GrammarRootAST ast, [NotNull] IDictionary<string, Rule> rules, [NotNull] Grammar g) { this._ast = ast; this._rules = rules; this._g = g; this._tool = g.tool; }
public LeftRecursiveRuleTransformer(GrammarRootAST ast, ICollection<Rule> rules, Grammar g) { this.ast = ast; this.rules = rules; this.g = g; this.tool = g.tool; }
/** * This method detects the following errors, which require analysis across * the whole grammar for rules according to their base context. * * <ul> * <li>{@link ErrorType#RULE_WITH_TOO_FEW_ALT_LABELS_GROUP}</li> * <li>{@link ErrorType#BASE_CONTEXT_MUST_BE_RULE_NAME}</li> * <li>{@link ErrorType#BASE_CONTEXT_CANNOT_BE_TRANSITIVE}</li> * <li>{@link ErrorType#LEXER_RULE_CANNOT_HAVE_BASE_CONTEXT}</li> * </ul> */ public override void FinishGrammar(GrammarRootAST root, GrammarAST ID) { Runtime.Misc.MultiMap<string, Rule> baseContexts = new Runtime.Misc.MultiMap<string, Rule>(); foreach (Rule r in ruleCollector.rules.Values) { GrammarAST optionAST = r.ast.GetOptionAST("baseContext"); if (r.ast.IsLexerRule()) { if (optionAST != null) { IToken errorToken = optionAST.Token; g.tool.errMgr.GrammarError(ErrorType.LEXER_RULE_CANNOT_HAVE_BASE_CONTEXT, g.fileName, errorToken, r.name); } continue; } baseContexts.Map(r.GetBaseContext(), r); if (optionAST != null) { Rule targetRule; ruleCollector.rules.TryGetValue(r.GetBaseContext(), out targetRule); bool targetSpecifiesBaseContext = targetRule != null && targetRule.ast != null && (targetRule.ast.GetOptionAST("baseContext") != null || !targetRule.name.Equals(targetRule.GetBaseContext())); if (targetSpecifiesBaseContext) { IToken errorToken = optionAST.Token; g.tool.errMgr.GrammarError(ErrorType.BASE_CONTEXT_CANNOT_BE_TRANSITIVE, g.fileName, errorToken, r.name); } } // It's unlikely for this to occur when optionAST is null, but checking // anyway means it can detect certain errors within the logic of the // Tool itself. if (!ruleCollector.rules.ContainsKey(r.GetBaseContext())) { IToken errorToken; if (optionAST != null) { errorToken = optionAST.Token; } else { errorToken = ((CommonTree)r.ast.GetChild(0)).Token; } g.tool.errMgr.GrammarError(ErrorType.BASE_CONTEXT_MUST_BE_RULE_NAME, g.fileName, errorToken, r.name); } } foreach (KeyValuePair<string, IList<Rule>> entry in baseContexts) { // suppress RULE_WITH_TOO_FEW_ALT_LABELS_GROUP if RULE_WITH_TOO_FEW_ALT_LABELS // would already have been reported for at least one rule with this // base context. bool suppressError = false; int altLabelCount = 0; int outerAltCount = 0; foreach (Rule rule in entry.Value) { outerAltCount += rule.numberOfAlts; IList<GrammarAST> altLabels; if (ruleCollector.ruleToAltLabels.TryGetValue(rule.name, out altLabels) && altLabels != null && altLabels.Count > 0) { if (altLabels.Count != rule.numberOfAlts) { suppressError = true; break; } altLabelCount += altLabels.Count; } } if (suppressError) { continue; } if (altLabelCount != 0 && altLabelCount != outerAltCount) { Rule errorRule = entry.Value[0]; g.tool.errMgr.GrammarError(ErrorType.RULE_WITH_TOO_FEW_ALT_LABELS_GROUP, g.fileName, ((CommonTree)errorRule.ast.GetChild(0)).Token, errorRule.name); } } }
// Routines to route visitor traffic to the checking routines public override void DiscoverGrammar(GrammarRootAST root, GrammarAST ID) { CheckGrammarName(ID.Token); }
/** For testing; builds trees, does sem anal */ public Grammar(string fileName, string grammarText, Grammar tokenVocabSource, [Nullable] ANTLRToolListener listener) { this.text = grammarText; this.fileName = fileName; this.tool = new AntlrTool(); this.tool.AddListener(listener); Antlr.Runtime.ANTLRStringStream @in = new Antlr.Runtime.ANTLRStringStream(grammarText); @in.name = fileName; this.ast = tool.Parse(fileName, @in); if (ast == null) { throw new NotSupportedException(); } if (ast.tokenStream == null) { throw new InvalidOperationException("expected ast to have a token stream"); } this.tokenStream = ast.tokenStream; this.originalTokenStream = this.tokenStream; // ensure each node has pointer to surrounding grammar Antlr.Runtime.Tree.TreeVisitor v = new Antlr.Runtime.Tree.TreeVisitor(new GrammarASTAdaptor()); v.Visit(ast, new SetPointersAction(this)); InitTokenSymbolTables(); if (tokenVocabSource != null) { ImportVocab(tokenVocabSource); } tool.Process(this, false); }
public Grammar(AntlrTool tool, [NotNull] GrammarRootAST ast) { if (ast == null) { throw new ArgumentNullException(nameof(ast)); } if (ast.tokenStream == null) { throw new ArgumentException("ast must have a token stream", nameof(ast)); } this.tool = tool; this.ast = ast; this.name = (ast.GetChild(0)).Text; this.tokenStream = ast.tokenStream; this.originalTokenStream = this.tokenStream; InitTokenSymbolTables(); }
public static IDictionary<int, Interval> GetStateToGrammarRegionMap(GrammarRootAST ast, IntervalSet grammarTokenTypes) { IDictionary<int, Interval> stateToGrammarRegionMap = new Dictionary<int, Interval>(); if (ast == null) return stateToGrammarRegionMap; IList<GrammarAST> nodes = ast.GetNodesWithType(grammarTokenTypes); foreach (GrammarAST n in nodes) { if (n.atnState != null) { Interval tokenRegion = Interval.Of(n.TokenStartIndex, n.TokenStopIndex); Antlr.Runtime.Tree.ITree ruleNode = null; // RULEs, BLOCKs of transformed recursive rules point to original token interval switch (n.Type) { case ANTLRParser.RULE: ruleNode = n; break; case ANTLRParser.BLOCK: case ANTLRParser.CLOSURE: ruleNode = n.GetAncestor(ANTLRParser.RULE); break; } if (ruleNode is RuleAST) { string ruleName = ((RuleAST)ruleNode).GetRuleName(); Rule r = ast.g.GetRule(ruleName); if (r is LeftRecursiveRule) { RuleAST originalAST = ((LeftRecursiveRule)r).GetOriginalAST(); tokenRegion = Interval.Of(originalAST.TokenStartIndex, originalAST.TokenStopIndex); } } stateToGrammarRegionMap[n.atnState.stateNumber] = tokenRegion; } } return stateToGrammarRegionMap; }
/** Return list of (TOKEN_NAME node, 'literal' node) pairs */ public static IList<System.Tuple<GrammarAST, GrammarAST>> GetStringLiteralAliasesFromLexerRules(GrammarRootAST ast) { string[] patterns = { "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL)))", "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL ACTION)))", "(RULE %name:TOKEN_REF (BLOCK (ALT %lit:STRING_LITERAL SEMPRED)))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) .)))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) . .)))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) (LEXER_ACTION_CALL . .))))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) . (LEXER_ACTION_CALL . .))))", "(RULE %name:TOKEN_REF (BLOCK (LEXER_ALT_ACTION (ALT %lit:STRING_LITERAL) (LEXER_ACTION_CALL . .) .)))", // TODO: allow doc comment in there }; GrammarASTAdaptor adaptor = new GrammarASTAdaptor(ast.Token.InputStream); Antlr.Runtime.Tree.TreeWizard wiz = new Antlr.Runtime.Tree.TreeWizard(adaptor, ANTLRParser.tokenNames); IList<System.Tuple<GrammarAST, GrammarAST>> lexerRuleToStringLiteral = new List<System.Tuple<GrammarAST, GrammarAST>>(); IList<GrammarAST> ruleNodes = ast.GetNodesWithType(ANTLRParser.RULE); if (ruleNodes == null || ruleNodes.Count == 0) return null; foreach (GrammarAST r in ruleNodes) { //tool.log("grammar", r.toStringTree()); // System.out.println("chk: "+r.toStringTree()); Antlr.Runtime.Tree.ITree name = r.GetChild(0); if (name.Type == ANTLRParser.TOKEN_REF) { // check rule against patterns bool isLitRule; foreach (string pattern in patterns) { isLitRule = DefAlias(r, pattern, wiz, lexerRuleToStringLiteral); if (isLitRule) break; } // if ( !isLitRule ) System.out.println("no pattern matched"); } } return lexerRuleToStringLiteral; }
/** Given the raw AST of a grammar, create a grammar object associated with the AST. Once we have the grammar object, ensure that all nodes in tree referred to this grammar. Later, we will use it for error handling and generally knowing from where a rule comes from. */ public virtual Grammar CreateGrammar(GrammarRootAST ast) { Grammar g; if (ast.grammarType == ANTLRParser.LEXER) g = new LexerGrammar(this, ast); else g = new Grammar(this, ast); // ensure each node has pointer to surrounding grammar GrammarTransformPipeline.SetGrammarPtr(g, ast); return g; }
/** Manually get option node from tree; return null if no defined. */ public static GrammarAST FindOptionValueAST(GrammarRootAST root, string option) { GrammarAST options = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.OPTIONS); if (options != null && options.ChildCount > 0) { foreach (object o in options.Children) { GrammarAST c = (GrammarAST)o; if (c.Type == ANTLRParser.ASSIGN && c.GetChild(0).Text.Equals(option)) { return (GrammarAST)c.GetChild(1); } } } return null; }
/** Return true if successful */ public virtual bool TranslateLeftRecursiveRule(GrammarRootAST ast, LeftRecursiveRule r, string language) { //tool.log("grammar", ruleAST.toStringTree()); GrammarAST prevRuleAST = r.ast; string ruleName = prevRuleAST.GetChild(0).Text; LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = new LeftRecursiveRuleAnalyzer(prevRuleAST, tool, ruleName, language); bool isLeftRec; try { //System.Console.WriteLine("TESTING ---------------\n" + // leftRecursiveRuleWalker.Text(ruleAST)); isLeftRec = leftRecursiveRuleWalker.rec_rule(); } catch (RecognitionException) { isLeftRec = false; // didn't match; oh well } if (!isLeftRec) return false; // replace old rule's AST; first create text of altered rule GrammarAST RULES = (GrammarAST)ast.GetFirstChildWithType(ANTLRParser.RULES); string newRuleText = leftRecursiveRuleWalker.GetArtificialOpPrecRule(); //System.Console.WriteLine("created: " + newRuleText); // now parse within the context of the grammar that originally created // the AST we are transforming. This could be an imported grammar so // we cannot just reference this.g because the role might come from // the imported grammar and not the root grammar (this.g) RuleAST t = ParseArtificialRule(prevRuleAST.g, newRuleText); // reuse the name token from the original AST since it refers to the proper source location in the original grammar ((GrammarAST)t.GetChild(0)).Token = ((GrammarAST)prevRuleAST.GetChild(0)).Token; // update grammar AST and set rule's AST. RULES.SetChild(prevRuleAST.ChildIndex, t); r.ast = t; // Reduce sets in newly created rule tree GrammarTransformPipeline transform = new GrammarTransformPipeline(g, g.tool); transform.ReduceBlocksToSets(r.ast); transform.ExpandParameterizedLoops(r.ast); // Rerun semantic checks on the new rule RuleCollector ruleCollector = new RuleCollector(g); ruleCollector.Visit(t, "rule"); BasicSemanticChecks basics = new BasicSemanticChecks(g, ruleCollector); // disable the assoc element option checks because they are already // handled for the pre-transformed rule. basics.checkAssocElementOption = false; basics.Visit(t, "rule"); // track recursive alt info for codegen r.recPrimaryAlts = new List<LeftRecursiveRuleAltInfo>(); foreach (var altInfo in leftRecursiveRuleWalker.prefixAndOtherAlts) r.recPrimaryAlts.Add(altInfo); if (r.recPrimaryAlts.Count == 0) { tool.errMgr.GrammarError(ErrorType.NO_NON_LR_ALTS, g.fileName, ((GrammarAST)r.ast.GetChild(0)).Token, r.name); } r.recOpAlts = new OrderedHashMap<int, LeftRecursiveRuleAltInfo>(); foreach (var pair in leftRecursiveRuleWalker.binaryAlts) r.recOpAlts[pair.Key] = pair.Value; foreach (var pair in leftRecursiveRuleWalker.ternaryAlts) r.recOpAlts[pair.Key] = pair.Value; foreach (var pair in leftRecursiveRuleWalker.suffixAlts) r.recOpAlts[pair.Key] = pair.Value; // walk alt info records and set their altAST to point to appropriate ALT subtree // from freshly created AST SetAltASTPointers(r, t); // update Rule to just one alt and add prec alt ActionAST arg = (ActionAST)r.ast.GetFirstChildWithType(ANTLRParser.ARG_ACTION); if (arg != null) { r.args = ScopeParser.ParseTypedArgList(arg, arg.Text, g); r.args.type = AttributeDict.DictType.ARG; r.args.ast = arg; arg.resolver = r.alt[1]; // todo: isn't this Rule or something? } // define labels on recursive rule refs we delete; they don't point to nodes of course // these are so $label in action translation works foreach (System.Tuple<GrammarAST, string> pair in leftRecursiveRuleWalker.leftRecursiveRuleRefLabels) { GrammarAST labelNode = pair.Item1; GrammarAST labelOpNode = (GrammarAST)labelNode.Parent; GrammarAST elementNode = (GrammarAST)labelOpNode.GetChild(1); LabelElementPair lp = new LabelElementPair(g, labelNode, elementNode, labelOpNode.Type); r.alt[1].labelDefs.Map(labelNode.Text, lp); } // copy to rule from walker r.leftRecursiveRuleRefLabels = leftRecursiveRuleWalker.leftRecursiveRuleRefLabels; tool.Log("grammar", "added: " + t.ToStringTree()); return true; }
/** Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. * We'll have this Grammar share token symbols later; don't generate * tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes * in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues */ public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar) { GrammarRootAST combinedAST = combinedGrammar.ast; //tool.log("grammar", "before="+combinedAST.toStringTree()); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream); GrammarAST[] elements = combinedAST.GetChildrenAsArray(); // MAKE A GRAMMAR ROOT and ID string lexerName = combinedAST.GetChild(0).Text + "Lexer"; GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream); lexerAST.grammarType = ANTLRParser.LEXER; lexerAST.Token.InputStream = combinedAST.Token.InputStream; lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName)); // COPY OPTIONS GrammarAST optionsRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null && optionsRoot.ChildCount != 0) { GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot); lexerAST.AddChild(lexerOptionsRoot); GrammarAST[] options = optionsRoot.GetChildrenAsArray(); foreach (GrammarAST o in options) { string optionName = o.GetChild(0).Text; if (Grammar.lexerOptions.Contains(optionName) && !Grammar.doNotCopyOptionsToLexer.Contains(optionName)) { GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o); lexerOptionsRoot.AddChild(optionTree); lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1)); } } } // COPY all named actions, but only move those with lexer:: scope IList<GrammarAST> actionsWeMoved = new List<GrammarAST>(); foreach (GrammarAST e in elements) { if (e.Type == ANTLRParser.AT) { lexerAST.AddChild((ITree)adaptor.DupTree(e)); if (e.GetChild(0).Text.Equals("lexer")) { actionsWeMoved.Add(e); } } } foreach (GrammarAST r in actionsWeMoved) { combinedAST.DeleteChild(r); } GrammarAST combinedRulesRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES); if (combinedRulesRoot == null) return lexerAST; // MOVE lexer rules GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES"); lexerAST.AddChild(lexerRulesRoot); IList<GrammarAST> rulesWeMoved = new List<GrammarAST>(); GrammarASTWithOptions[] rules; if (combinedRulesRoot.ChildCount > 0) { rules = combinedRulesRoot.Children.Cast<GrammarASTWithOptions>().ToArray(); } else { rules = new GrammarASTWithOptions[0]; } foreach (GrammarASTWithOptions r in rules) { string ruleName = r.GetChild(0).Text; if (Grammar.IsTokenName(ruleName)) { lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r)); rulesWeMoved.Add(r); } } foreach (GrammarAST r in rulesWeMoved) { combinedRulesRoot.DeleteChild(r); } // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if' IList<System.Tuple<GrammarAST, GrammarAST>> litAliases = Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST); ISet<string> stringLiterals = combinedGrammar.GetStringLiterals(); // add strings from combined grammar (and imported grammars) into lexer // put them first as they are keywords; must resolve ambigs to these rules // tool.log("grammar", "strings from parser: "+stringLiterals); int insertIndex = 0; foreach (string lit in stringLiterals) { // if lexer already has a rule for literal, continue if (litAliases != null) { foreach (System.Tuple<GrammarAST, GrammarAST> pair in litAliases) { GrammarAST litAST = pair.Item2; if (lit.Equals(litAST.Text)) goto continueNextLit; } } // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>)) string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit); // can't use wizard; need special node types GrammarAST litRule = new RuleAST(ANTLRParser.RULE); BlockAST blk = new BlockAST(ANTLRParser.BLOCK); AltAST alt = new AltAST(ANTLRParser.ALT); TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.AddChild(slit); blk.AddChild(alt); CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.AddChild(new TerminalAST(idToken)); litRule.AddChild(blk); lexerRulesRoot.InsertChild(insertIndex, litRule); // lexerRulesRoot.getChildren().add(0, litRule); lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent // next literal will be added after the one just added insertIndex++; continueNextLit: ; } // TODO: take out after stable if slow lexerAST.SanityCheckParentAndChildIndexes(); combinedAST.SanityCheckParentAndChildIndexes(); // tool.log("grammar", combinedAST.toTokenString()); combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree()); combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree()); if (lexerRulesRoot.ChildCount == 0) return null; return lexerAST; }
public LexerGrammar(AntlrTool tool, GrammarRootAST ast) : base(tool, ast) { }