/** Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. * We'll have this Grammar share token symbols later; don't generate * tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes * in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues */ public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar) { GrammarRootAST combinedAST = combinedGrammar.ast; //tool.log("grammar", "before="+combinedAST.toStringTree()); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream); GrammarAST[] elements = combinedAST.GetChildrenAsArray(); // MAKE A GRAMMAR ROOT and ID string lexerName = combinedAST.GetChild(0).Text + "Lexer"; GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream); lexerAST.grammarType = ANTLRParser.LEXER; lexerAST.Token.InputStream = combinedAST.Token.InputStream; lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName)); // COPY OPTIONS GrammarAST optionsRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null && optionsRoot.ChildCount != 0) { GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot); lexerAST.AddChild(lexerOptionsRoot); GrammarAST[] options = optionsRoot.GetChildrenAsArray(); foreach (GrammarAST o in options) { string optionName = o.GetChild(0).Text; if (Grammar.lexerOptions.Contains(optionName) && !Grammar.doNotCopyOptionsToLexer.Contains(optionName)) { GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o); lexerOptionsRoot.AddChild(optionTree); lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1)); } } } // COPY all named actions, but only move those with lexer:: scope IList <GrammarAST> actionsWeMoved = new List <GrammarAST>(); foreach (GrammarAST e in elements) { if (e.Type == ANTLRParser.AT) { lexerAST.AddChild((ITree)adaptor.DupTree(e)); if (e.GetChild(0).Text.Equals("lexer")) { actionsWeMoved.Add(e); } } } foreach (GrammarAST r in actionsWeMoved) { combinedAST.DeleteChild(r); } GrammarAST combinedRulesRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES); if (combinedRulesRoot == null) { return(lexerAST); } // MOVE lexer rules GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES"); lexerAST.AddChild(lexerRulesRoot); IList <GrammarAST> rulesWeMoved = new List <GrammarAST>(); GrammarASTWithOptions[] rules; if (combinedRulesRoot.ChildCount > 0) { rules = combinedRulesRoot.Children.Cast <GrammarASTWithOptions>().ToArray(); } else { rules = new GrammarASTWithOptions[0]; } foreach (GrammarASTWithOptions r in rules) { string ruleName = r.GetChild(0).Text; if (Grammar.IsTokenName(ruleName)) { lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r)); rulesWeMoved.Add(r); } } foreach (GrammarAST r in rulesWeMoved) { combinedRulesRoot.DeleteChild(r); } // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if' IList <System.Tuple <GrammarAST, GrammarAST> > litAliases = Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST); ISet <string> stringLiterals = combinedGrammar.GetStringLiterals(); // add strings from combined grammar (and imported grammars) into lexer // put them first as they are keywords; must resolve ambigs to these rules // tool.log("grammar", "strings from parser: "+stringLiterals); int insertIndex = 0; foreach (string lit in stringLiterals) { // if lexer already has a rule for literal, continue if (litAliases != null) { foreach (System.Tuple <GrammarAST, GrammarAST> pair in litAliases) { GrammarAST litAST = pair.Item2; if (lit.Equals(litAST.Text)) { goto continueNextLit; } } } // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>)) string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit); // can't use wizard; need special node types GrammarAST litRule = new RuleAST(ANTLRParser.RULE); BlockAST blk = new BlockAST(ANTLRParser.BLOCK); AltAST alt = new AltAST(ANTLRParser.ALT); TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.AddChild(slit); blk.AddChild(alt); CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.AddChild(new TerminalAST(idToken)); litRule.AddChild(blk); lexerRulesRoot.InsertChild(insertIndex, litRule); // lexerRulesRoot.getChildren().add(0, litRule); lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent // next literal will be added after the one just added insertIndex++; continueNextLit: ; } // TODO: take out after stable if slow lexerAST.SanityCheckParentAndChildIndexes(); combinedAST.SanityCheckParentAndChildIndexes(); // tool.log("grammar", combinedAST.toTokenString()); combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree()); combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree()); if (lexerRulesRoot.ChildCount == 0) { return(null); } return(lexerAST); }
protected virtual GrammarAST TranslateLeftFactoredElement(GrammarAST element, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } if (mode == DecisionFactorMode.COMBINED_FACTOR) { throw new InvalidOperationException("Cannot return a combined answer."); } Debug.Assert(!mode.IncludeFactoredAlts() || !mode.IncludeUnfactoredAlts()); switch (element.Type) { case ANTLRParser.ASSIGN: case ANTLRParser.PLUS_ASSIGN: { /* label=a * * ==> * * factoredElement label=a_factored */ GrammarAST translatedChildElement = TranslateLeftFactoredElement((GrammarAST)element.GetChild(1), factoredRule, variant, mode, includeFactoredElement); if (translatedChildElement == null) { return(null); } RuleAST ruleAST = (RuleAST)element.GetAncestor(ANTLRParser.RULE); #if false LOGGER.log(Level.WARNING, "Could not left factor ''{0}'' out of decision in rule ''{1}'': labeled rule references are not yet supported.", new object[] { factoredRule, ruleAST.GetChild(0).Text }); #endif return(null); //if (!translatedChildElement.IsNil) //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = translatedChildElement; // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // string action = string.Format("_localctx.{0} = (ContextType)_localctx.getParent().getChild(_localctx.getParent().getChildCount() - 1);", element.GetChild(0).Text); // adaptor.AddChild(root, new ActionAST(adaptor.CreateToken(ANTLRParser.ACTION, action))); // return root; //} //else //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = adaptor.DeleteChild(translatedChildElement, 0); // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // adaptor.AddChild(root, element); // adaptor.ReplaceChildren(element, 1, 1, translatedChildElement); // return root; //} } case ANTLRParser.RULE_REF: { if (factoredRule.Equals(element.Token.Text)) { if (!mode.IncludeFactoredAlts()) { return(null); } if (includeFactoredElement) { // this element is already left factored return(element); } GrammarAST root1 = (GrammarAST)adaptor.Nil(); root1.AddChild((ITree)adaptor.Create(TokenConstants.Epsilon, "EPSILON")); root1.DeleteChild(0); return(root1); } Rule targetRule; if (!_rules.TryGetValue(element.Token.Text, out targetRule)) { return(null); } RuleVariants ruleVariants = CreateLeftFactoredRuleVariant(targetRule, factoredRule); switch (ruleVariants) { case RuleVariants.NONE: if (!mode.IncludeUnfactoredAlts()) { return(null); } // just call the original rule (leave the element unchanged) return(element); case RuleVariants.FULLY_FACTORED: if (!mode.IncludeFactoredAlts()) { return(null); } break; case RuleVariants.PARTIALLY_FACTORED: break; default: throw new InvalidOperationException(); } string marker = mode.IncludeFactoredAlts() ? ATNSimulator.RuleLfVariantMarker : ATNSimulator.RuleNolfVariantMarker; element.SetText(element.Text + marker + factoredRule); GrammarAST root = (GrammarAST)adaptor.Nil(); if (includeFactoredElement) { Debug.Assert(mode.IncludeFactoredAlts()); RuleRefAST factoredRuleRef = new RuleRefAST(adaptor.CreateToken(ANTLRParser.RULE_REF, factoredRule)); factoredRuleRef.SetOption(SUPPRESS_ACCESSOR, (GrammarAST)adaptor.Create(ANTLRParser.ID, "true")); Rule factoredRuleDef = _rules[factoredRule]; if (factoredRuleDef is LeftRecursiveRule) { factoredRuleRef.SetOption(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME, (GrammarAST)adaptor.Create(ANTLRParser.INT, "0")); } if (factoredRuleDef.args != null && factoredRuleDef.args.Size() > 0) { throw new NotImplementedException("Cannot left-factor rules with arguments yet."); } adaptor.AddChild(root, factoredRuleRef); } adaptor.AddChild(root, element); return(root); } case ANTLRParser.BLOCK: { GrammarAST cloned = element.DupTree(); if (!TranslateLeftFactoredDecision(cloned, factoredRule, variant, mode, includeFactoredElement)) { return(null); } if (cloned.ChildCount != 1) { return(null); } GrammarAST root = (GrammarAST)adaptor.Nil(); for (int i = 0; i < cloned.GetChild(0).ChildCount; i++) { adaptor.AddChild(root, cloned.GetChild(0).GetChild(i)); } return(root); } case ANTLRParser.POSITIVE_CLOSURE: { /* a+ * * => * * factoredElement a_factored a* */ GrammarAST originalChildElement = (GrammarAST)element.GetChild(0); GrammarAST translatedElement = TranslateLeftFactoredElement(originalChildElement.DupTree(), factoredRule, variant, mode, includeFactoredElement); if (translatedElement == null) { return(null); } GrammarAST closure = new StarBlockAST(ANTLRParser.CLOSURE, adaptor.CreateToken(ANTLRParser.CLOSURE, "CLOSURE"), null); adaptor.AddChild(closure, originalChildElement); GrammarAST root = (GrammarAST)adaptor.Nil(); if (mode.IncludeFactoredAlts()) { if (includeFactoredElement) { object factoredElement = adaptor.DeleteChild(translatedElement, 0); adaptor.AddChild(root, factoredElement); } } adaptor.AddChild(root, translatedElement); adaptor.AddChild(root, closure); return(root); } case ANTLRParser.CLOSURE: case ANTLRParser.OPTIONAL: // not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.DOT: // ref to imported grammar, not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.ACTION: case ANTLRParser.SEMPRED: if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.WILDCARD: case ANTLRParser.STRING_LITERAL: case ANTLRParser.TOKEN_REF: case ANTLRParser.NOT: // terminals if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.EPSILON: // empty tree if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); default: // unknown return(null); } }
/** Merge all the rules, token definitions, and named actions from * imported grammars into the root grammar tree. Perform: * * (tokens { X (= Y 'y')) + (tokens { Z ) -> (tokens { X (= Y 'y') Z) * * (@ members {foo}) + (@ members {bar}) -> (@ members {foobar}) * * (RULES (RULE x y)) + (RULES (RULE z)) -> (RULES (RULE x y z)) * * Rules in root prevent same rule from being appended to RULES node. * * The goal is a complete combined grammar so we can ignore subordinate * grammars. */ public virtual void IntegrateImportedGrammars(Grammar rootGrammar) { IList <Grammar> imports = rootGrammar.GetAllImportedGrammars(); if (imports == null) { return; } GrammarAST root = rootGrammar.ast; GrammarAST id = (GrammarAST)root.GetChild(0); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(id.Token.InputStream); GrammarAST tokensRoot = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.TOKENS_SPEC); IList <GrammarAST> actionRoots = root.GetNodesWithType(ANTLRParser.AT); // Compute list of rules in root grammar and ensure we have a RULES node GrammarAST RULES = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.RULES); ISet <string> rootRuleNames = new HashSet <string>(); // make list of rules we have in root grammar IList <GrammarAST> rootRules = RULES.GetNodesWithType(ANTLRParser.RULE); foreach (GrammarAST r in rootRules) { rootRuleNames.Add(r.GetChild(0).Text); } foreach (Grammar imp in imports) { // COPY TOKENS GrammarAST imp_tokensRoot = (GrammarAST)imp.ast.GetFirstChildWithType(ANTLRParser.TOKENS_SPEC); if (imp_tokensRoot != null) { rootGrammar.tool.Log("grammar", "imported tokens: " + imp_tokensRoot.Children); if (tokensRoot == null) { tokensRoot = (GrammarAST)adaptor.Create(ANTLRParser.TOKENS_SPEC, "TOKENS"); tokensRoot.g = rootGrammar; root.InsertChild(1, tokensRoot); // ^(GRAMMAR ID TOKENS...) } tokensRoot.AddChildren(imp_tokensRoot.Children); } IList <GrammarAST> all_actionRoots = new List <GrammarAST>(); IList <GrammarAST> imp_actionRoots = imp.ast.GetAllChildrenWithType(ANTLRParser.AT); if (actionRoots != null) { foreach (var actionRoot in actionRoots) { all_actionRoots.Add(actionRoot); } } foreach (var actionRoot in imp_actionRoots) { all_actionRoots.Add(actionRoot); } // COPY ACTIONS if (imp_actionRoots != null) { IDictionary <System.Tuple <string, string>, GrammarAST> namedActions = new Dictionary <System.Tuple <string, string>, GrammarAST>(); rootGrammar.tool.Log("grammar", "imported actions: " + imp_actionRoots); foreach (GrammarAST at in all_actionRoots) { string scopeName = rootGrammar.GetDefaultActionScope(); GrammarAST scope, name, action; if (at.ChildCount > 2) { // must have a scope scope = (GrammarAST)at.GetChild(0); scopeName = scope.Text; name = (GrammarAST)at.GetChild(1); action = (GrammarAST)at.GetChild(2); } else { name = (GrammarAST)at.GetChild(0); action = (GrammarAST)at.GetChild(1); } GrammarAST prevAction; if (!namedActions.TryGetValue(Tuple.Create(scopeName, name.Text), out prevAction) || prevAction == null) { namedActions[Tuple.Create(scopeName, name.Text)] = action; } else { if (prevAction.g == at.g) { rootGrammar.tool.errMgr.GrammarError(ErrorType.ACTION_REDEFINITION, at.g.fileName, name.Token, name.Text); } else { string s1 = prevAction.Text; s1 = s1.Substring(1, s1.Length - 2); string s2 = action.Text; s2 = s2.Substring(1, s2.Length - 2); string combinedAction = "{" + s1 + '\n' + s2 + "}"; prevAction.Token.Text = combinedAction; } } } // at this point, we have complete list of combined actions, // some of which are already living in root grammar. // Merge in any actions not in root grammar into root's tree. foreach (string scopeName in namedActions.Keys.Select(i => i.Item1).Distinct()) { foreach (string name in namedActions.Keys.Where(i => i.Item1 == scopeName).Select(i => i.Item2)) { GrammarAST action = namedActions[Tuple.Create(scopeName, name)]; rootGrammar.tool.Log("grammar", action.g.name + " " + scopeName + ":" + name + "=" + action.Text); if (action.g != rootGrammar) { root.InsertChild(1, action.Parent); } } } } // COPY RULES IList <GrammarAST> rules = imp.ast.GetNodesWithType(ANTLRParser.RULE); if (rules != null) { foreach (GrammarAST r in rules) { rootGrammar.tool.Log("grammar", "imported rule: " + r.ToStringTree()); string name = r.GetChild(0).Text; bool rootAlreadyHasRule = rootRuleNames.Contains(name); if (!rootAlreadyHasRule) { RULES.AddChild(r); // merge in if not overridden rootRuleNames.Add(name); } } } GrammarAST optionsRoot = (GrammarAST)imp.ast.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null) { // suppress the warning if the options match the options specified // in the root grammar // https://github.com/antlr/antlr4/issues/707 bool hasNewOption = false; foreach (KeyValuePair <string, GrammarAST> option in imp.ast.GetOptions()) { string importOption = imp.ast.GetOptionString(option.Key); if (importOption == null) { continue; } string rootOption = rootGrammar.ast.GetOptionString(option.Key); if (!importOption.Equals(rootOption)) { hasNewOption = true; break; } } if (hasNewOption) { rootGrammar.tool.errMgr.GrammarError(ErrorType.OPTIONS_IN_DELEGATE, optionsRoot.g.fileName, optionsRoot.Token, imp.name); } } } rootGrammar.tool.Log("grammar", "Grammar: " + rootGrammar.ast.ToStringTree()); }