public LeftRecursiveRuleTransformer(GrammarRootAST ast, ICollection <Rule> rules, Grammar g) { this.ast = ast; this.rules = rules; this.g = g; this.tool = g.tool; }
public LeftFactoringRuleTransformer([NotNull] GrammarRootAST ast, [NotNull] IDictionary <string, Rule> rules, [NotNull] Grammar g) { this._ast = ast; this._rules = rules; this._g = g; this._tool = g.tool; }
/** Convenience method to load and process an ANTLR grammar. Useful * when creating interpreters. If you need to access to the lexer * grammar created while processing a combined grammar, use * getImplicitLexer() on returned grammar. */ public virtual Grammar LoadGrammar(string fileName) { GrammarRootAST grammarRootAST = ParseGrammar(fileName); Grammar g = CreateGrammar(grammarRootAST); g.fileName = fileName; Process(g, false); return g; }
/** Given the raw AST of a grammar, create a grammar object associated with the AST. Once we have the grammar object, ensure that all nodes in tree referred to this grammar. Later, we will use it for error handling and generally knowing from where a rule comes from. */ public virtual Grammar CreateGrammar(GrammarRootAST ast) { Grammar g; if (ast.grammarType == ANTLRParser.LEXER) g = new LexerGrammar(this, ast); else g = new Grammar(this, ast); // ensure each node has pointer to surrounding grammar GrammarTransformPipeline.SetGrammarPtr(g, ast); return g; }
public virtual void Process() { GrammarRootAST root = g.ast; if (root == null) { return; } tool.Log("grammar", "before: " + root.ToStringTree()); IntegrateImportedGrammars(g); ReduceBlocksToSets(root); ExpandParameterizedLoops(root); tool.Log("grammar", "after: " + root.ToStringTree()); }
/** Manually get option node from tree; return null if no defined. */ public static GrammarAST FindOptionValueAST(GrammarRootAST root, string option) { GrammarAST options = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.OPTIONS); if (options != null && options.ChildCount > 0) { foreach (object o in options.Children) { GrammarAST c = (GrammarAST)o; if (c.Type == ANTLRParser.ASSIGN && c.GetChild(0).Text.Equals(option)) { return (GrammarAST)c.GetChild(1); } } } return null; }
/** * Try current dir then dir of g then lib dir * @param g * @param nameNode The node associated with the imported grammar name. */ public virtual Grammar LoadImportedGrammar(Grammar g, GrammarAST nameNode) { string name = nameNode.Text; Grammar imported; if (!importedGrammars.TryGetValue(name, out imported) || imported == null) { g.tool.Log("grammar", "load " + name + " from " + g.fileName); string importedFile = null; foreach (string extension in ALL_GRAMMAR_EXTENSIONS) { importedFile = GetImportedGrammarFile(g, name + extension); if (importedFile != null) { break; } } if (importedFile == null) { errMgr.GrammarError(ErrorType.CANNOT_FIND_IMPORTED_GRAMMAR, g.fileName, nameNode.Token, name); return(null); } string absolutePath = Path.GetFullPath(importedFile); string fileContent = File.ReadAllText(absolutePath, Encoding.GetEncoding(grammarEncoding)); char[] fileChars = fileContent.ToCharArray(); ANTLRStringStream @in = new ANTLRStringStream(fileChars, fileChars.Length, importedFile); GrammarRootAST root = Parse(g.fileName, @in); if (root == null) { return(null); } imported = CreateGrammar(root); imported.fileName = absolutePath; importedGrammars[root.GetGrammarName()] = imported; } return(imported); }
public virtual GrammarRootAST ParseGrammar(string fileName) { try { string file = fileName; if (!Path.IsPathRooted(file)) { file = Path.Combine(inputDirectory, fileName); } string fileContent = File.ReadAllText(file, Encoding.GetEncoding(grammarEncoding)); char[] fileChars = fileContent.ToCharArray(); ANTLRStringStream @in = new ANTLRStringStream(fileChars, fileChars.Length, fileName); GrammarRootAST t = Parse(fileName, @in); return(t); } catch (IOException ioe) { errMgr.ToolError(ErrorType.CANNOT_OPEN_FILE, ioe, fileName); } return(null); }
public virtual IList <GrammarRootAST> SortGrammarByTokenVocab(IList <string> fileNames) { //System.Console.WriteLine(fileNames); Graph <string> g = new Graph <string>(); IList <GrammarRootAST> roots = new List <GrammarRootAST>(); foreach (string fileName in fileNames) { GrammarAST t = ParseGrammar(fileName); if (t == null || t is GrammarASTErrorNode) { continue; // came back as error node } if (((GrammarRootAST)t).hasErrors) { continue; } GrammarRootAST root = (GrammarRootAST)t; roots.Add(root); root.fileName = fileName; string grammarName = root.GetChild(0).Text; GrammarAST tokenVocabNode = FindOptionValueAST(root, "tokenVocab"); // Make grammars depend on any tokenVocab options if (tokenVocabNode != null) { string vocabName = tokenVocabNode.Text; // Strip quote characters if any int len = vocabName.Length; int firstChar = vocabName[0]; int lastChar = vocabName[len - 1]; if (len >= 2 && firstChar == '\'' && lastChar == '\'') { vocabName = vocabName.Substring(1, len - 2); } // If the name contains a path delimited by forward slashes, // use only the part after the last slash as the name int lastSlash = vocabName.LastIndexOf('/'); if (lastSlash >= 0) { vocabName = vocabName.Substring(lastSlash + 1); } g.AddEdge(grammarName, vocabName); } // add cycle to graph so we always process a grammar if no error // even if no dependency g.AddEdge(grammarName, grammarName); } IList <string> sortedGrammarNames = g.Sort(); //System.Console.WriteLine("sortedGrammarNames=" + sortedGrammarNames); IList <GrammarRootAST> sortedRoots = new List <GrammarRootAST>(); foreach (string grammarName in sortedGrammarNames) { foreach (GrammarRootAST root in roots) { if (root.GetGrammarName().Equals(grammarName)) { sortedRoots.Add(root); break; } } } return(sortedRoots); }
/** * This method detects the following errors, which require analysis across * the whole grammar for rules according to their base context. * * <ul> * <li>{@link ErrorType#RULE_WITH_TOO_FEW_ALT_LABELS_GROUP}</li> * <li>{@link ErrorType#BASE_CONTEXT_MUST_BE_RULE_NAME}</li> * <li>{@link ErrorType#BASE_CONTEXT_CANNOT_BE_TRANSITIVE}</li> * <li>{@link ErrorType#LEXER_RULE_CANNOT_HAVE_BASE_CONTEXT}</li> * </ul> */ public override void FinishGrammar(GrammarRootAST root, GrammarAST ID) { Runtime.Misc.MultiMap <string, Rule> baseContexts = new Runtime.Misc.MultiMap <string, Rule>(); foreach (Rule r in ruleCollector.rules.Values) { GrammarAST optionAST = r.ast.GetOptionAST("baseContext"); if (r.ast.IsLexerRule()) { if (optionAST != null) { IToken errorToken = optionAST.Token; g.tool.errMgr.GrammarError(ErrorType.LEXER_RULE_CANNOT_HAVE_BASE_CONTEXT, g.fileName, errorToken, r.name); } continue; } baseContexts.Map(r.GetBaseContext(), r); if (optionAST != null) { Rule targetRule; ruleCollector.rules.TryGetValue(r.GetBaseContext(), out targetRule); bool targetSpecifiesBaseContext = targetRule != null && targetRule.ast != null && (targetRule.ast.GetOptionAST("baseContext") != null || !targetRule.name.Equals(targetRule.GetBaseContext())); if (targetSpecifiesBaseContext) { IToken errorToken = optionAST.Token; g.tool.errMgr.GrammarError(ErrorType.BASE_CONTEXT_CANNOT_BE_TRANSITIVE, g.fileName, errorToken, r.name); } } // It's unlikely for this to occur when optionAST is null, but checking // anyway means it can detect certain errors within the logic of the // Tool itself. if (!ruleCollector.rules.ContainsKey(r.GetBaseContext())) { IToken errorToken; if (optionAST != null) { errorToken = optionAST.Token; } else { errorToken = ((CommonTree)r.ast.GetChild(0)).Token; } g.tool.errMgr.GrammarError(ErrorType.BASE_CONTEXT_MUST_BE_RULE_NAME, g.fileName, errorToken, r.name); } } foreach (KeyValuePair <string, IList <Rule> > entry in baseContexts) { // suppress RULE_WITH_TOO_FEW_ALT_LABELS_GROUP if RULE_WITH_TOO_FEW_ALT_LABELS // would already have been reported for at least one rule with this // base context. bool suppressError = false; int altLabelCount = 0; int outerAltCount = 0; foreach (Rule rule in entry.Value) { outerAltCount += rule.numberOfAlts; IList <GrammarAST> altLabels; if (ruleCollector.ruleToAltLabels.TryGetValue(rule.name, out altLabels) && altLabels != null && altLabels.Count > 0) { if (altLabels.Count != rule.numberOfAlts) { suppressError = true; break; } altLabelCount += altLabels.Count; } } if (suppressError) { continue; } if (altLabelCount != 0 && altLabelCount != outerAltCount) { Rule errorRule = entry.Value[0]; g.tool.errMgr.GrammarError(ErrorType.RULE_WITH_TOO_FEW_ALT_LABELS_GROUP, g.fileName, ((CommonTree)errorRule.ast.GetChild(0)).Token, errorRule.name); } } }
// Routines to route visitor traffic to the checking routines public override void DiscoverGrammar(GrammarRootAST root, GrammarAST ID) { CheckGrammarName(ID.Token); }
/** Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. * We'll have this Grammar share token symbols later; don't generate * tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes * in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues */ public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar) { GrammarRootAST combinedAST = combinedGrammar.ast; //tool.log("grammar", "before="+combinedAST.toStringTree()); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream); GrammarAST[] elements = combinedAST.GetChildrenAsArray(); // MAKE A GRAMMAR ROOT and ID string lexerName = combinedAST.GetChild(0).Text + "Lexer"; GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream); lexerAST.grammarType = ANTLRParser.LEXER; lexerAST.Token.InputStream = combinedAST.Token.InputStream; lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName)); // COPY OPTIONS GrammarAST optionsRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null && optionsRoot.ChildCount != 0) { GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot); lexerAST.AddChild(lexerOptionsRoot); GrammarAST[] options = optionsRoot.GetChildrenAsArray(); foreach (GrammarAST o in options) { string optionName = o.GetChild(0).Text; if (Grammar.lexerOptions.Contains(optionName) && !Grammar.doNotCopyOptionsToLexer.Contains(optionName)) { GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o); lexerOptionsRoot.AddChild(optionTree); lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1)); } } } // COPY all named actions, but only move those with lexer:: scope IList <GrammarAST> actionsWeMoved = new List <GrammarAST>(); foreach (GrammarAST e in elements) { if (e.Type == ANTLRParser.AT) { lexerAST.AddChild((ITree)adaptor.DupTree(e)); if (e.GetChild(0).Text.Equals("lexer")) { actionsWeMoved.Add(e); } } } foreach (GrammarAST r in actionsWeMoved) { combinedAST.DeleteChild(r); } GrammarAST combinedRulesRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES); if (combinedRulesRoot == null) { return(lexerAST); } // MOVE lexer rules GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES"); lexerAST.AddChild(lexerRulesRoot); IList <GrammarAST> rulesWeMoved = new List <GrammarAST>(); GrammarASTWithOptions[] rules; if (combinedRulesRoot.ChildCount > 0) { rules = combinedRulesRoot.Children.Cast <GrammarASTWithOptions>().ToArray(); } else { rules = new GrammarASTWithOptions[0]; } foreach (GrammarASTWithOptions r in rules) { string ruleName = r.GetChild(0).Text; if (Grammar.IsTokenName(ruleName)) { lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r)); rulesWeMoved.Add(r); } } foreach (GrammarAST r in rulesWeMoved) { combinedRulesRoot.DeleteChild(r); } // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if' IList <System.Tuple <GrammarAST, GrammarAST> > litAliases = Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST); ISet <string> stringLiterals = combinedGrammar.GetStringLiterals(); // add strings from combined grammar (and imported grammars) into lexer // put them first as they are keywords; must resolve ambigs to these rules // tool.log("grammar", "strings from parser: "+stringLiterals); int insertIndex = 0; foreach (string lit in stringLiterals) { // if lexer already has a rule for literal, continue if (litAliases != null) { foreach (System.Tuple <GrammarAST, GrammarAST> pair in litAliases) { GrammarAST litAST = pair.Item2; if (lit.Equals(litAST.Text)) { goto continueNextLit; } } } // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>)) string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit); // can't use wizard; need special node types GrammarAST litRule = new RuleAST(ANTLRParser.RULE); BlockAST blk = new BlockAST(ANTLRParser.BLOCK); AltAST alt = new AltAST(ANTLRParser.ALT); TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.AddChild(slit); blk.AddChild(alt); CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.AddChild(new TerminalAST(idToken)); litRule.AddChild(blk); lexerRulesRoot.InsertChild(insertIndex, litRule); // lexerRulesRoot.getChildren().add(0, litRule); lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent // next literal will be added after the one just added insertIndex++; continueNextLit: ; } // TODO: take out after stable if slow lexerAST.SanityCheckParentAndChildIndexes(); combinedAST.SanityCheckParentAndChildIndexes(); // tool.log("grammar", combinedAST.toTokenString()); combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree()); combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree()); if (lexerRulesRoot.ChildCount == 0) { return(null); } return(lexerAST); }
/** Return true if successful */ public virtual bool TranslateLeftRecursiveRule(GrammarRootAST ast, LeftRecursiveRule r, string language) { //tool.log("grammar", ruleAST.toStringTree()); GrammarAST prevRuleAST = r.ast; string ruleName = prevRuleAST.GetChild(0).Text; LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = new LeftRecursiveRuleAnalyzer(prevRuleAST, tool, ruleName, language); bool isLeftRec; try { //System.Console.WriteLine("TESTING ---------------\n" + // leftRecursiveRuleWalker.Text(ruleAST)); isLeftRec = leftRecursiveRuleWalker.rec_rule(); } catch (RecognitionException) { isLeftRec = false; // didn't match; oh well } if (!isLeftRec) { return(false); } // replace old rule's AST; first create text of altered rule GrammarAST RULES = (GrammarAST)ast.GetFirstChildWithType(ANTLRParser.RULES); string newRuleText = leftRecursiveRuleWalker.GetArtificialOpPrecRule(); //System.Console.WriteLine("created: " + newRuleText); // now parse within the context of the grammar that originally created // the AST we are transforming. This could be an imported grammar so // we cannot just reference this.g because the role might come from // the imported grammar and not the root grammar (this.g) RuleAST t = ParseArtificialRule(prevRuleAST.g, newRuleText); // reuse the name token from the original AST since it refers to the proper source location in the original grammar ((GrammarAST)t.GetChild(0)).Token = ((GrammarAST)prevRuleAST.GetChild(0)).Token; // update grammar AST and set rule's AST. RULES.SetChild(prevRuleAST.ChildIndex, t); r.ast = t; // Reduce sets in newly created rule tree GrammarTransformPipeline transform = new GrammarTransformPipeline(g, g.tool); transform.ReduceBlocksToSets(r.ast); transform.ExpandParameterizedLoops(r.ast); // Rerun semantic checks on the new rule RuleCollector ruleCollector = new RuleCollector(g); ruleCollector.Visit(t, "rule"); BasicSemanticChecks basics = new BasicSemanticChecks(g, ruleCollector); // disable the assoc element option checks because they are already // handled for the pre-transformed rule. basics.checkAssocElementOption = false; basics.Visit(t, "rule"); // track recursive alt info for codegen r.recPrimaryAlts = new List <LeftRecursiveRuleAltInfo>(); foreach (var altInfo in leftRecursiveRuleWalker.prefixAndOtherAlts) { r.recPrimaryAlts.Add(altInfo); } if (r.recPrimaryAlts.Count == 0) { tool.errMgr.GrammarError(ErrorType.NO_NON_LR_ALTS, g.fileName, ((GrammarAST)r.ast.GetChild(0)).Token, r.name); } r.recOpAlts = new OrderedHashMap <int, LeftRecursiveRuleAltInfo>(); foreach (var pair in leftRecursiveRuleWalker.binaryAlts) { r.recOpAlts[pair.Key] = pair.Value; } foreach (var pair in leftRecursiveRuleWalker.ternaryAlts) { r.recOpAlts[pair.Key] = pair.Value; } foreach (var pair in leftRecursiveRuleWalker.suffixAlts) { r.recOpAlts[pair.Key] = pair.Value; } // walk alt info records and set their altAST to point to appropriate ALT subtree // from freshly created AST SetAltASTPointers(r, t); // update Rule to just one alt and add prec alt ActionAST arg = (ActionAST)r.ast.GetFirstChildWithType(ANTLRParser.ARG_ACTION); if (arg != null) { r.args = ScopeParser.ParseTypedArgList(arg, arg.Text, g); r.args.type = AttributeDict.DictType.ARG; r.args.ast = arg; arg.resolver = r.alt[1]; // todo: isn't this Rule or something? } // define labels on recursive rule refs we delete; they don't point to nodes of course // these are so $label in action translation works foreach (System.Tuple <GrammarAST, string> pair in leftRecursiveRuleWalker.leftRecursiveRuleRefLabels) { GrammarAST labelNode = pair.Item1; GrammarAST labelOpNode = (GrammarAST)labelNode.Parent; GrammarAST elementNode = (GrammarAST)labelOpNode.GetChild(1); LabelElementPair lp = new LabelElementPair(g, labelNode, elementNode, labelOpNode.Type); r.alt[1].labelDefs.Map(labelNode.Text, lp); } // copy to rule from walker r.leftRecursiveRuleRefLabels = leftRecursiveRuleWalker.leftRecursiveRuleRefLabels; tool.Log("grammar", "added: " + t.ToStringTree()); return(true); }
public LexerGrammar(AntlrTool tool, GrammarRootAST ast) : base(tool, ast) { }