/** Remove any lexer rules from a COMBINED; already passed to lexer */ protected void TrimGrammar() { if (grammar.type != GrammarType.Combined) { return; } // form is (header ... ) ( grammar ID (scope ...) ... ( rule ... ) ( rule ... ) ... ) GrammarAST p = root; // find the grammar spec while (!p.Text.Equals("grammar")) { p = (GrammarAST)p.Parent.GetChild(p.ChildIndex + 1); } for (int i = 0; i < p.ChildCount; i++) { if (p.GetChild(i).Type != RULE) { continue; } string ruleName = p.GetChild(i).GetChild(0).Text; //Console.Out.WriteLine( "rule " + ruleName + " prev=" + prev.getText() ); if (Rule.GetRuleType(ruleName) == RuleType.Lexer) { // remove lexer rule p.DeleteChild(i); i--; } } //Console.Out.WriteLine( "root after removal is: " + root.ToStringList() ); }
// TODO: this strips the tree properly, but since text() // uses the start of stop token index and gets text from that // ineffectively ignores this routine. public virtual GrammarAST StripLeftRecursion(GrammarAST altAST) { GrammarAST lrlabel = null; GrammarAST first = (GrammarAST)altAST.GetChild(0); int leftRecurRuleIndex = 0; if (first.Type == ELEMENT_OPTIONS) { first = (GrammarAST)altAST.GetChild(1); leftRecurRuleIndex = 1; } ITree rref = first.GetChild(1); // if label=rule if ((first.Type == RULE_REF && first.Text.Equals(ruleName)) || (rref != null && rref.Type == RULE_REF && rref.Text.Equals(ruleName))) { if (first.Type == ASSIGN || first.Type == PLUS_ASSIGN) { lrlabel = (GrammarAST)first.GetChild(0); } // remove rule ref (first child unless options present) altAST.DeleteChild(leftRecurRuleIndex); // reset index so it prints properly (sets token range of // ALT to start to right of left recur rule we deleted) GrammarAST newFirstChild = (GrammarAST)altAST.GetChild(leftRecurRuleIndex); altAST.TokenStartIndex = newFirstChild.TokenStartIndex; } return(lrlabel); }
protected virtual GrammarAST ExpandOptionalQuantifiersForAlt(GrammarAST alt) { if (alt.ChildCount == 0) { return(null); } if (alt.GetChild(0).Type == ANTLRParser.OPTIONAL) { GrammarAST root = (GrammarAST)adaptor.Nil(); GrammarAST alt2 = alt.DupTree(); alt2.DeleteChild(0); if (alt2.ChildCount == 0) { adaptor.AddChild(alt2, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } alt.SetChild(0, alt.GetChild(0).GetChild(0)); if (alt.GetChild(0).Type == ANTLRParser.BLOCK && alt.GetChild(0).ChildCount == 1 && alt.GetChild(0).GetChild(0).Type == ANTLRParser.ALT) { GrammarAST list = (GrammarAST)adaptor.Nil(); foreach (object tree in ((GrammarAST)alt.GetChild(0).GetChild(0)).Children) { adaptor.AddChild(list, tree); } adaptor.ReplaceChildren(alt, 0, 0, list); } adaptor.AddChild(root, alt); adaptor.AddChild(root, alt2); return(root); } else if (alt.GetChild(0).Type == ANTLRParser.CLOSURE) { GrammarAST root = (GrammarAST)adaptor.Nil(); GrammarAST alt2 = alt.DupTree(); alt2.DeleteChild(0); if (alt2.ChildCount == 0) { adaptor.AddChild(alt2, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } PlusBlockAST plusBlockAST = new PlusBlockAST(ANTLRParser.POSITIVE_CLOSURE, adaptor.CreateToken(ANTLRParser.POSITIVE_CLOSURE, "+"), null); for (int i = 0; i < alt.GetChild(0).ChildCount; i++) { plusBlockAST.AddChild(alt.GetChild(0).GetChild(i)); } alt.SetChild(0, plusBlockAST); adaptor.AddChild(root, alt); adaptor.AddChild(root, alt2); return(root); } return(alt); }
/** Manually get option node from tree; return null if no defined. */ public static GrammarAST FindOptionValueAST(GrammarRootAST root, string option) { GrammarAST options = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.OPTIONS); if (options != null && options.ChildCount > 0) { foreach (object o in options.Children) { GrammarAST c = (GrammarAST)o; if (c.Type == ANTLRParser.ASSIGN && c.GetChild(0).Text.Equals(option)) { return (GrammarAST)c.GetChild(1); } } } return null; }
protected override void EnterLabeledLexerElement(GrammarAST tree) { IToken label = ((GrammarAST)tree.GetChild(0)).Token; g.tool.errMgr.GrammarError(ErrorType.V3_LEXER_LABEL, g.fileName, label, label.Text); }
internal virtual void CheckNumRules(GrammarAST rulesNode) { if (rulesNode.ChildCount == 0) { GrammarAST root = (GrammarAST)rulesNode.Parent; GrammarAST IDNode = (GrammarAST)root.GetChild(0); g.tool.errMgr.GrammarError(ErrorType.NO_RULES, g.fileName, null, IDNode.Text, g); } }
/** * Match (RULE RULE_REF (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*))) * Match (RULE RULE_REF (BLOCK (ALT .*) (ALT (ASSIGN ID RULE_REF[self]) .*) (ALT .*))) */ public static bool HasImmediateRecursiveRuleRefs(GrammarAST t, string ruleName) { if (t == null) { return(false); } GrammarAST blk = (GrammarAST)t.GetFirstChildWithType(BLOCK); if (blk == null) { return(false); } int n = blk.Children.Count; for (int i = 0; i < n; i++) { GrammarAST alt = (GrammarAST)blk.Children[i]; ITree first = alt.GetChild(0); if (first == null) { continue; } if (first.Type == ELEMENT_OPTIONS) { first = alt.GetChild(1); if (first == null) { continue; } } if (first.Type == RULE_REF && first.Text.Equals(ruleName)) { return(true); } ITree rref = first.GetChild(1); if (rref != null && rref.Type == RULE_REF && rref.Text.Equals(ruleName)) { return(true); } } return(false); }
protected override void ExitMode(GrammarAST tree) { if (nonFragmentRuleCount == 0) { IToken token = tree.Token; string name = "?"; if (tree.ChildCount > 0) { name = tree.GetChild(0).Text; if (string.IsNullOrEmpty(name)) { name = "?"; } token = ((GrammarAST)tree.GetChild(0)).Token; } g.tool.errMgr.GrammarError(ErrorType.MODE_WITHOUT_RULES, g.fileName, token, name, g); } }
protected virtual GrammarAST TranslateLeftFactoredAlternative(GrammarAST alternative, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } else if (mode == DecisionFactorMode.COMBINED_FACTOR && !includeFactoredElement) { throw new ArgumentException("Cannot return a combined answer without the factored element."); } Debug.Assert(alternative.ChildCount > 0); if (alternative.GetChild(0).Type == ANTLRParser.EPSILON) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED) { return(alternative); } return(null); } GrammarAST translatedElement = TranslateLeftFactoredElement((GrammarAST)alternative.GetChild(0), factoredRule, variant, mode, includeFactoredElement); if (translatedElement == null) { return(null); } alternative.ReplaceChildren(0, 0, translatedElement); if (alternative.ChildCount == 0) { adaptor.AddChild(alternative, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } Debug.Assert(alternative.ChildCount > 0); return(alternative); }
public int CountAltsForBlock(GrammarAST t) { int n = 0; for (int i = 0; i < t.ChildCount; i++) { if (t.GetChild(i).Type == ALT) { n++; } } return(n); }
private void HandleDotLoop(GrammarAST start) { GrammarAST block = (GrammarAST)start.GetChild(0); IDictionary <string, object> opts = new Dictionary <string, object>(); opts["greedy"] = "false"; if (grammar.type != GrammarType.Lexer) { // parser grammars assume k=1 for .* loops otherwise they (analysis?) look til EOF! opts["k"] = 1; } block.SetOptions(grammar, opts); }
public PlusBlock(OutputModelFactory factory, GrammarAST plusRoot, IList <CodeBlockForAlt> alts) : base(factory, plusRoot, alts) { BlockAST blkAST = (BlockAST)plusRoot.GetChild(0); PlusBlockStartState blkStart = (PlusBlockStartState)blkAST.atnState; PlusLoopbackState loop = blkStart.loopBackState; stateNumber = blkStart.loopBackState.stateNumber; blockStartStateNumber = blkStart.stateNumber; loopBackStateNumber = loop.stateNumber; this.error = GetThrowNoViableAlt(factory, plusRoot, null); decision = loop.decision; }
public LL1PlusBlockSingleAlt(OutputModelFactory factory, GrammarAST plusRoot, IList <CodeBlockForAlt> alts) : base(factory, plusRoot, alts) { BlockAST blkAST = (BlockAST)plusRoot.GetChild(0); PlusBlockStartState blkStart = (PlusBlockStartState)blkAST.atnState; stateNumber = blkStart.loopBackState.stateNumber; blockStartStateNumber = blkStart.stateNumber; PlusBlockStartState plus = (PlusBlockStartState)blkAST.atnState; this.decision = plus.loopBackState.decision; IntervalSet[] altLookSets = factory.GetGrammar().decisionLOOK[decision]; IntervalSet loopBackLook = altLookSets[0]; loopExpr = AddCodeForLoopLookaheadTempVar(loopBackLook); }
protected virtual bool ExpandOptionalQuantifiersForBlock(GrammarAST block, bool variant) { IList <GrammarAST> children = new List <GrammarAST>(); for (int i = 0; i < block.ChildCount; i++) { GrammarAST child = (GrammarAST)block.GetChild(i); if (child.Type != ANTLRParser.ALT) { children.Add(child); continue; } GrammarAST expandedAlt = ExpandOptionalQuantifiersForAlt(child); if (expandedAlt == null) { return(false); } children.Add(expandedAlt); } GrammarAST newChildren = (GrammarAST)adaptor.Nil(); newChildren.AddChildren(children); block.ReplaceChildren(0, block.ChildCount - 1, newChildren); block.FreshenParentAndChildIndexesDeeply(); if (!variant && block.Parent is RuleAST) { RuleAST ruleAST = (RuleAST)block.Parent; string ruleName = ruleAST.GetChild(0).Text; Rule r = _rules[ruleName]; IList <GrammarAST> blockAlts = block.GetAllChildrenWithType(ANTLRParser.ALT); r.numberOfAlts = blockAlts.Count; r.alt = new Alternative[blockAlts.Count + 1]; for (int i = 0; i < blockAlts.Count; i++) { r.alt[i + 1] = new Alternative(r, i + 1); r.alt[i + 1].ast = (AltAST)blockAlts[i]; } } return(true); }
public virtual Handle Star([NotNull] GrammarAST starAST, [NotNull] Handle elem) { StarBlockStartState blkStart = (StarBlockStartState)elem.left; BlockEndState blkEnd = (BlockEndState)elem.right; preventEpsilonClosureBlocks.Add(Tuple.Create <Rule, ATNState, ATNState>(currentRule, blkStart, blkEnd)); StarLoopEntryState entry = NewState <StarLoopEntryState>(starAST); entry.nonGreedy = !((QuantifierAST)starAST).GetGreedy(); entry.sll = false; // no way to express SLL restriction atn.DefineDecisionState(entry); LoopEndState end = NewState <LoopEndState>(starAST); StarLoopbackState loop = NewState <StarLoopbackState>(starAST); entry.loopBackState = loop; end.loopBackState = loop; BlockAST blkAST = (BlockAST)starAST.GetChild(0); if (((QuantifierAST)starAST).GetGreedy()) { if (ExpectNonGreedy(blkAST)) { g.tool.errMgr.GrammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, starAST.Token, starAST.Token.Text); } Epsilon(entry, blkStart); // loop enter edge (alt 1) Epsilon(entry, end); // bypass loop edge (alt 2) } else { // if not greedy, priority to exit branch; make it first Epsilon(entry, end); // bypass loop edge (alt 1) Epsilon(entry, blkStart); // loop enter edge (alt 2) } Epsilon(blkEnd, loop); // block end hits loop back Epsilon(loop, entry); // loop back to entry/exit decision starAST.atnState = entry; // decision is to enter/exit; blk is its own decision return(new Handle(entry, end)); }
public virtual Handle Plus([NotNull] GrammarAST plusAST, [NotNull] Handle blk) { PlusBlockStartState blkStart = (PlusBlockStartState)blk.left; BlockEndState blkEnd = (BlockEndState)blk.right; preventEpsilonClosureBlocks.Add(Tuple.Create <Rule, ATNState, ATNState>(currentRule, blkStart, blkEnd)); PlusLoopbackState loop = NewState <PlusLoopbackState>(plusAST); loop.nonGreedy = !((QuantifierAST)plusAST).GetGreedy(); loop.sll = false; // no way to express SLL restriction atn.DefineDecisionState(loop); LoopEndState end = NewState <LoopEndState>(plusAST); blkStart.loopBackState = loop; end.loopBackState = loop; plusAST.atnState = loop; Epsilon(blkEnd, loop); // blk can see loop back BlockAST blkAST = (BlockAST)plusAST.GetChild(0); if (((QuantifierAST)plusAST).GetGreedy()) { if (ExpectNonGreedy(blkAST)) { g.tool.errMgr.GrammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, plusAST.Token, plusAST.Token.Text); } Epsilon(loop, blkStart); // loop back to start Epsilon(loop, end); // or exit } else { // if not greedy, priority to exit branch; make it first Epsilon(loop, end); // exit Epsilon(loop, blkStart); // loop back to start } return(new Handle(blkStart, end)); }
private void HandleElementPlusAssign(GrammarAST id, GrammarAST elementStart) { if (elementStart.Type == ANTLRParser.ROOT || elementStart.Type == ANTLRParser.BANG) { elementStart = (GrammarAST)elementStart.GetChild(0); } if (elementStart.Type == RULE_REF) { grammar.DefineRuleListLabel(currentRuleName, id.Token, elementStart); } else if (elementStart.Type == WILDCARD && grammar.type == GrammarType.TreeParser) { grammar.DefineWildcardTreeListLabel(currentRuleName, id.Token, elementStart); } else { grammar.DefineTokenListLabel(currentRuleName, id.Token, elementStart); } }
internal virtual bool HasTypeOrMoreCommand([NotNull] Rule r) { GrammarAST ast = r.ast; if (ast == null) { return(false); } GrammarAST altActionAst = (GrammarAST)ast.GetFirstDescendantWithType(ANTLRParser.LEXER_ALT_ACTION); if (altActionAst == null) { // the rule isn't followed by any commands return(false); } // first child is the alt itself, subsequent are the actions for (int i = 1; i < altActionAst.ChildCount; i++) { GrammarAST node = (GrammarAST)altActionAst.GetChild(i); if (node.Type == ANTLRParser.LEXER_ACTION_CALL) { if ("type".Equals(node.GetChild(0).Text)) { return(true); } } else if ("more".Equals(node.Text)) { return(true); } } return(false); }
/** Return true if successful */ public virtual bool TranslateLeftRecursiveRule(GrammarRootAST ast, LeftRecursiveRule r, string language) { //tool.log("grammar", ruleAST.toStringTree()); GrammarAST prevRuleAST = r.ast; string ruleName = prevRuleAST.GetChild(0).Text; LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = new LeftRecursiveRuleAnalyzer(prevRuleAST, tool, ruleName, language); bool isLeftRec; try { //System.Console.WriteLine("TESTING ---------------\n" + // leftRecursiveRuleWalker.Text(ruleAST)); isLeftRec = leftRecursiveRuleWalker.rec_rule(); } catch (RecognitionException) { isLeftRec = false; // didn't match; oh well } if (!isLeftRec) { return(false); } // replace old rule's AST; first create text of altered rule GrammarAST RULES = (GrammarAST)ast.GetFirstChildWithType(ANTLRParser.RULES); string newRuleText = leftRecursiveRuleWalker.GetArtificialOpPrecRule(); //System.Console.WriteLine("created: " + newRuleText); // now parse within the context of the grammar that originally created // the AST we are transforming. This could be an imported grammar so // we cannot just reference this.g because the role might come from // the imported grammar and not the root grammar (this.g) RuleAST t = ParseArtificialRule(prevRuleAST.g, newRuleText); // reuse the name token from the original AST since it refers to the proper source location in the original grammar ((GrammarAST)t.GetChild(0)).Token = ((GrammarAST)prevRuleAST.GetChild(0)).Token; // update grammar AST and set rule's AST. RULES.SetChild(prevRuleAST.ChildIndex, t); r.ast = t; // Reduce sets in newly created rule tree GrammarTransformPipeline transform = new GrammarTransformPipeline(g, g.tool); transform.ReduceBlocksToSets(r.ast); transform.ExpandParameterizedLoops(r.ast); // Rerun semantic checks on the new rule RuleCollector ruleCollector = new RuleCollector(g); ruleCollector.Visit(t, "rule"); BasicSemanticChecks basics = new BasicSemanticChecks(g, ruleCollector); // disable the assoc element option checks because they are already // handled for the pre-transformed rule. basics.checkAssocElementOption = false; basics.Visit(t, "rule"); // track recursive alt info for codegen r.recPrimaryAlts = new List <LeftRecursiveRuleAltInfo>(); foreach (var altInfo in leftRecursiveRuleWalker.prefixAndOtherAlts) { r.recPrimaryAlts.Add(altInfo); } if (r.recPrimaryAlts.Count == 0) { tool.errMgr.GrammarError(ErrorType.NO_NON_LR_ALTS, g.fileName, ((GrammarAST)r.ast.GetChild(0)).Token, r.name); } r.recOpAlts = new OrderedHashMap <int, LeftRecursiveRuleAltInfo>(); foreach (var pair in leftRecursiveRuleWalker.binaryAlts) { r.recOpAlts[pair.Key] = pair.Value; } foreach (var pair in leftRecursiveRuleWalker.ternaryAlts) { r.recOpAlts[pair.Key] = pair.Value; } foreach (var pair in leftRecursiveRuleWalker.suffixAlts) { r.recOpAlts[pair.Key] = pair.Value; } // walk alt info records and set their altAST to point to appropriate ALT subtree // from freshly created AST SetAltASTPointers(r, t); // update Rule to just one alt and add prec alt ActionAST arg = (ActionAST)r.ast.GetFirstChildWithType(ANTLRParser.ARG_ACTION); if (arg != null) { r.args = ScopeParser.ParseTypedArgList(arg, arg.Text, g); r.args.type = AttributeDict.DictType.ARG; r.args.ast = arg; arg.resolver = r.alt[1]; // todo: isn't this Rule or something? } // define labels on recursive rule refs we delete; they don't point to nodes of course // these are so $label in action translation works foreach (System.Tuple <GrammarAST, string> pair in leftRecursiveRuleWalker.leftRecursiveRuleRefLabels) { GrammarAST labelNode = pair.Item1; GrammarAST labelOpNode = (GrammarAST)labelNode.Parent; GrammarAST elementNode = (GrammarAST)labelOpNode.GetChild(1); LabelElementPair lp = new LabelElementPair(g, labelNode, elementNode, labelOpNode.Type); r.alt[1].labelDefs.Map(labelNode.Text, lp); } // copy to rule from walker r.leftRecursiveRuleRefLabels = leftRecursiveRuleWalker.leftRecursiveRuleRefLabels; tool.Log("grammar", "added: " + t.ToStringTree()); return(true); }
/** Merge all the rules, token definitions, and named actions from * imported grammars into the root grammar tree. Perform: * * (tokens { X (= Y 'y')) + (tokens { Z ) -> (tokens { X (= Y 'y') Z) * * (@ members {foo}) + (@ members {bar}) -> (@ members {foobar}) * * (RULES (RULE x y)) + (RULES (RULE z)) -> (RULES (RULE x y z)) * * Rules in root prevent same rule from being appended to RULES node. * * The goal is a complete combined grammar so we can ignore subordinate * grammars. */ public virtual void IntegrateImportedGrammars(Grammar rootGrammar) { IList <Grammar> imports = rootGrammar.GetAllImportedGrammars(); if (imports == null) { return; } GrammarAST root = rootGrammar.ast; GrammarAST id = (GrammarAST)root.GetChild(0); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(id.Token.InputStream); GrammarAST tokensRoot = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.TOKENS_SPEC); IList <GrammarAST> actionRoots = root.GetNodesWithType(ANTLRParser.AT); // Compute list of rules in root grammar and ensure we have a RULES node GrammarAST RULES = (GrammarAST)root.GetFirstChildWithType(ANTLRParser.RULES); ISet <string> rootRuleNames = new HashSet <string>(); // make list of rules we have in root grammar IList <GrammarAST> rootRules = RULES.GetNodesWithType(ANTLRParser.RULE); foreach (GrammarAST r in rootRules) { rootRuleNames.Add(r.GetChild(0).Text); } foreach (Grammar imp in imports) { // COPY TOKENS GrammarAST imp_tokensRoot = (GrammarAST)imp.ast.GetFirstChildWithType(ANTLRParser.TOKENS_SPEC); if (imp_tokensRoot != null) { rootGrammar.tool.Log("grammar", "imported tokens: " + imp_tokensRoot.Children); if (tokensRoot == null) { tokensRoot = (GrammarAST)adaptor.Create(ANTLRParser.TOKENS_SPEC, "TOKENS"); tokensRoot.g = rootGrammar; root.InsertChild(1, tokensRoot); // ^(GRAMMAR ID TOKENS...) } tokensRoot.AddChildren(imp_tokensRoot.Children); } IList <GrammarAST> all_actionRoots = new List <GrammarAST>(); IList <GrammarAST> imp_actionRoots = imp.ast.GetAllChildrenWithType(ANTLRParser.AT); if (actionRoots != null) { foreach (var actionRoot in actionRoots) { all_actionRoots.Add(actionRoot); } } foreach (var actionRoot in imp_actionRoots) { all_actionRoots.Add(actionRoot); } // COPY ACTIONS if (imp_actionRoots != null) { IDictionary <System.Tuple <string, string>, GrammarAST> namedActions = new Dictionary <System.Tuple <string, string>, GrammarAST>(); rootGrammar.tool.Log("grammar", "imported actions: " + imp_actionRoots); foreach (GrammarAST at in all_actionRoots) { string scopeName = rootGrammar.GetDefaultActionScope(); GrammarAST scope, name, action; if (at.ChildCount > 2) { // must have a scope scope = (GrammarAST)at.GetChild(0); scopeName = scope.Text; name = (GrammarAST)at.GetChild(1); action = (GrammarAST)at.GetChild(2); } else { name = (GrammarAST)at.GetChild(0); action = (GrammarAST)at.GetChild(1); } GrammarAST prevAction; if (!namedActions.TryGetValue(Tuple.Create(scopeName, name.Text), out prevAction) || prevAction == null) { namedActions[Tuple.Create(scopeName, name.Text)] = action; } else { if (prevAction.g == at.g) { rootGrammar.tool.errMgr.GrammarError(ErrorType.ACTION_REDEFINITION, at.g.fileName, name.Token, name.Text); } else { string s1 = prevAction.Text; s1 = s1.Substring(1, s1.Length - 2); string s2 = action.Text; s2 = s2.Substring(1, s2.Length - 2); string combinedAction = "{" + s1 + '\n' + s2 + "}"; prevAction.Token.Text = combinedAction; } } } // at this point, we have complete list of combined actions, // some of which are already living in root grammar. // Merge in any actions not in root grammar into root's tree. foreach (string scopeName in namedActions.Keys.Select(i => i.Item1).Distinct()) { foreach (string name in namedActions.Keys.Where(i => i.Item1 == scopeName).Select(i => i.Item2)) { GrammarAST action = namedActions[Tuple.Create(scopeName, name)]; rootGrammar.tool.Log("grammar", action.g.name + " " + scopeName + ":" + name + "=" + action.Text); if (action.g != rootGrammar) { root.InsertChild(1, action.Parent); } } } } // COPY RULES IList <GrammarAST> rules = imp.ast.GetNodesWithType(ANTLRParser.RULE); if (rules != null) { foreach (GrammarAST r in rules) { rootGrammar.tool.Log("grammar", "imported rule: " + r.ToStringTree()); string name = r.GetChild(0).Text; bool rootAlreadyHasRule = rootRuleNames.Contains(name); if (!rootAlreadyHasRule) { RULES.AddChild(r); // merge in if not overridden rootRuleNames.Add(name); } } } GrammarAST optionsRoot = (GrammarAST)imp.ast.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null) { // suppress the warning if the options match the options specified // in the root grammar // https://github.com/antlr/antlr4/issues/707 bool hasNewOption = false; foreach (KeyValuePair <string, GrammarAST> option in imp.ast.GetOptions()) { string importOption = imp.ast.GetOptionString(option.Key); if (importOption == null) { continue; } string rootOption = rootGrammar.ast.GetOptionString(option.Key); if (!importOption.Equals(rootOption)) { hasNewOption = true; break; } } if (hasNewOption) { rootGrammar.tool.errMgr.GrammarError(ErrorType.OPTIONS_IN_DELEGATE, optionsRoot.g.fileName, optionsRoot.Token, imp.name); } } } rootGrammar.tool.Log("grammar", "Grammar: " + rootGrammar.ast.ToStringTree()); }
/** Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. * We'll have this Grammar share token symbols later; don't generate * tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes * in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues */ public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar) { GrammarRootAST combinedAST = combinedGrammar.ast; //tool.log("grammar", "before="+combinedAST.toStringTree()); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream); GrammarAST[] elements = combinedAST.GetChildrenAsArray(); // MAKE A GRAMMAR ROOT and ID string lexerName = combinedAST.GetChild(0).Text + "Lexer"; GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream); lexerAST.grammarType = ANTLRParser.LEXER; lexerAST.Token.InputStream = combinedAST.Token.InputStream; lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName)); // COPY OPTIONS GrammarAST optionsRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null && optionsRoot.ChildCount != 0) { GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot); lexerAST.AddChild(lexerOptionsRoot); GrammarAST[] options = optionsRoot.GetChildrenAsArray(); foreach (GrammarAST o in options) { string optionName = o.GetChild(0).Text; if (Grammar.lexerOptions.Contains(optionName) && !Grammar.doNotCopyOptionsToLexer.Contains(optionName)) { GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o); lexerOptionsRoot.AddChild(optionTree); lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1)); } } } // COPY all named actions, but only move those with lexer:: scope IList <GrammarAST> actionsWeMoved = new List <GrammarAST>(); foreach (GrammarAST e in elements) { if (e.Type == ANTLRParser.AT) { lexerAST.AddChild((ITree)adaptor.DupTree(e)); if (e.GetChild(0).Text.Equals("lexer")) { actionsWeMoved.Add(e); } } } foreach (GrammarAST r in actionsWeMoved) { combinedAST.DeleteChild(r); } GrammarAST combinedRulesRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES); if (combinedRulesRoot == null) { return(lexerAST); } // MOVE lexer rules GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES"); lexerAST.AddChild(lexerRulesRoot); IList <GrammarAST> rulesWeMoved = new List <GrammarAST>(); GrammarASTWithOptions[] rules; if (combinedRulesRoot.ChildCount > 0) { rules = combinedRulesRoot.Children.Cast <GrammarASTWithOptions>().ToArray(); } else { rules = new GrammarASTWithOptions[0]; } foreach (GrammarASTWithOptions r in rules) { string ruleName = r.GetChild(0).Text; if (Grammar.IsTokenName(ruleName)) { lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r)); rulesWeMoved.Add(r); } } foreach (GrammarAST r in rulesWeMoved) { combinedRulesRoot.DeleteChild(r); } // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if' IList <System.Tuple <GrammarAST, GrammarAST> > litAliases = Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST); ISet <string> stringLiterals = combinedGrammar.GetStringLiterals(); // add strings from combined grammar (and imported grammars) into lexer // put them first as they are keywords; must resolve ambigs to these rules // tool.log("grammar", "strings from parser: "+stringLiterals); int insertIndex = 0; foreach (string lit in stringLiterals) { // if lexer already has a rule for literal, continue if (litAliases != null) { foreach (System.Tuple <GrammarAST, GrammarAST> pair in litAliases) { GrammarAST litAST = pair.Item2; if (lit.Equals(litAST.Text)) { goto continueNextLit; } } } // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>)) string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit); // can't use wizard; need special node types GrammarAST litRule = new RuleAST(ANTLRParser.RULE); BlockAST blk = new BlockAST(ANTLRParser.BLOCK); AltAST alt = new AltAST(ANTLRParser.ALT); TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.AddChild(slit); blk.AddChild(alt); CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.AddChild(new TerminalAST(idToken)); litRule.AddChild(blk); lexerRulesRoot.InsertChild(insertIndex, litRule); // lexerRulesRoot.getChildren().add(0, litRule); lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent // next literal will be added after the one just added insertIndex++; continueNextLit: ; } // TODO: take out after stable if slow lexerAST.SanityCheckParentAndChildIndexes(); combinedAST.SanityCheckParentAndChildIndexes(); // tool.log("grammar", combinedAST.toTokenString()); combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree()); combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree()); if (lexerRulesRoot.ChildCount == 0) { return(null); } return(lexerAST); }
protected virtual GrammarAST TranslateLeftFactoredElement(GrammarAST element, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } if (mode == DecisionFactorMode.COMBINED_FACTOR) { throw new InvalidOperationException("Cannot return a combined answer."); } Debug.Assert(!mode.IncludeFactoredAlts() || !mode.IncludeUnfactoredAlts()); switch (element.Type) { case ANTLRParser.ASSIGN: case ANTLRParser.PLUS_ASSIGN: { /* label=a * * ==> * * factoredElement label=a_factored */ GrammarAST translatedChildElement = TranslateLeftFactoredElement((GrammarAST)element.GetChild(1), factoredRule, variant, mode, includeFactoredElement); if (translatedChildElement == null) { return(null); } RuleAST ruleAST = (RuleAST)element.GetAncestor(ANTLRParser.RULE); #if false LOGGER.log(Level.WARNING, "Could not left factor ''{0}'' out of decision in rule ''{1}'': labeled rule references are not yet supported.", new object[] { factoredRule, ruleAST.GetChild(0).Text }); #endif return(null); //if (!translatedChildElement.IsNil) //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = translatedChildElement; // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // string action = string.Format("_localctx.{0} = (ContextType)_localctx.getParent().getChild(_localctx.getParent().getChildCount() - 1);", element.GetChild(0).Text); // adaptor.AddChild(root, new ActionAST(adaptor.CreateToken(ANTLRParser.ACTION, action))); // return root; //} //else //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = adaptor.DeleteChild(translatedChildElement, 0); // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // adaptor.AddChild(root, element); // adaptor.ReplaceChildren(element, 1, 1, translatedChildElement); // return root; //} } case ANTLRParser.RULE_REF: { if (factoredRule.Equals(element.Token.Text)) { if (!mode.IncludeFactoredAlts()) { return(null); } if (includeFactoredElement) { // this element is already left factored return(element); } GrammarAST root1 = (GrammarAST)adaptor.Nil(); root1.AddChild((ITree)adaptor.Create(TokenConstants.Epsilon, "EPSILON")); root1.DeleteChild(0); return(root1); } Rule targetRule; if (!_rules.TryGetValue(element.Token.Text, out targetRule)) { return(null); } RuleVariants ruleVariants = CreateLeftFactoredRuleVariant(targetRule, factoredRule); switch (ruleVariants) { case RuleVariants.NONE: if (!mode.IncludeUnfactoredAlts()) { return(null); } // just call the original rule (leave the element unchanged) return(element); case RuleVariants.FULLY_FACTORED: if (!mode.IncludeFactoredAlts()) { return(null); } break; case RuleVariants.PARTIALLY_FACTORED: break; default: throw new InvalidOperationException(); } string marker = mode.IncludeFactoredAlts() ? ATNSimulator.RuleLfVariantMarker : ATNSimulator.RuleNolfVariantMarker; element.SetText(element.Text + marker + factoredRule); GrammarAST root = (GrammarAST)adaptor.Nil(); if (includeFactoredElement) { Debug.Assert(mode.IncludeFactoredAlts()); RuleRefAST factoredRuleRef = new RuleRefAST(adaptor.CreateToken(ANTLRParser.RULE_REF, factoredRule)); factoredRuleRef.SetOption(SUPPRESS_ACCESSOR, (GrammarAST)adaptor.Create(ANTLRParser.ID, "true")); Rule factoredRuleDef = _rules[factoredRule]; if (factoredRuleDef is LeftRecursiveRule) { factoredRuleRef.SetOption(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME, (GrammarAST)adaptor.Create(ANTLRParser.INT, "0")); } if (factoredRuleDef.args != null && factoredRuleDef.args.Size() > 0) { throw new NotImplementedException("Cannot left-factor rules with arguments yet."); } adaptor.AddChild(root, factoredRuleRef); } adaptor.AddChild(root, element); return(root); } case ANTLRParser.BLOCK: { GrammarAST cloned = element.DupTree(); if (!TranslateLeftFactoredDecision(cloned, factoredRule, variant, mode, includeFactoredElement)) { return(null); } if (cloned.ChildCount != 1) { return(null); } GrammarAST root = (GrammarAST)adaptor.Nil(); for (int i = 0; i < cloned.GetChild(0).ChildCount; i++) { adaptor.AddChild(root, cloned.GetChild(0).GetChild(i)); } return(root); } case ANTLRParser.POSITIVE_CLOSURE: { /* a+ * * => * * factoredElement a_factored a* */ GrammarAST originalChildElement = (GrammarAST)element.GetChild(0); GrammarAST translatedElement = TranslateLeftFactoredElement(originalChildElement.DupTree(), factoredRule, variant, mode, includeFactoredElement); if (translatedElement == null) { return(null); } GrammarAST closure = new StarBlockAST(ANTLRParser.CLOSURE, adaptor.CreateToken(ANTLRParser.CLOSURE, "CLOSURE"), null); adaptor.AddChild(closure, originalChildElement); GrammarAST root = (GrammarAST)adaptor.Nil(); if (mode.IncludeFactoredAlts()) { if (includeFactoredElement) { object factoredElement = adaptor.DeleteChild(translatedElement, 0); adaptor.AddChild(root, factoredElement); } } adaptor.AddChild(root, translatedElement); adaptor.AddChild(root, closure); return(root); } case ANTLRParser.CLOSURE: case ANTLRParser.OPTIONAL: // not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.DOT: // ref to imported grammar, not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.ACTION: case ANTLRParser.SEMPRED: if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.WILDCARD: case ANTLRParser.STRING_LITERAL: case ANTLRParser.TOKEN_REF: case ANTLRParser.NOT: // terminals if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.EPSILON: // empty tree if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); default: // unknown return(null); } }
protected virtual bool TranslateLeftFactoredDecision(GrammarAST block, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } else if (mode == DecisionFactorMode.COMBINED_FACTOR && !includeFactoredElement) { throw new ArgumentException("Cannot return a combined answer without the factored element."); } if (!ExpandOptionalQuantifiersForBlock(block, variant)) { return(false); } IList <GrammarAST> alternatives = block.GetAllChildrenWithType(ANTLRParser.ALT); GrammarAST[] factoredAlternatives = new GrammarAST[alternatives.Count]; GrammarAST[] unfactoredAlternatives = new GrammarAST[alternatives.Count]; IntervalSet factoredIntervals = new IntervalSet(); IntervalSet unfactoredIntervals = new IntervalSet(); for (int i = 0; i < alternatives.Count; i++) { GrammarAST alternative = alternatives[i]; if (mode.IncludeUnfactoredAlts()) { GrammarAST unfactoredAlt = TranslateLeftFactoredAlternative(alternative.DupTree(), factoredRule, variant, DecisionFactorMode.PARTIAL_UNFACTORED, false); unfactoredAlternatives[i] = unfactoredAlt; if (unfactoredAlt != null) { unfactoredIntervals.Add(i); } } if (mode.IncludeFactoredAlts()) { GrammarAST factoredAlt = TranslateLeftFactoredAlternative(alternative, factoredRule, variant, mode == DecisionFactorMode.COMBINED_FACTOR ? DecisionFactorMode.PARTIAL_FACTORED : DecisionFactorMode.FULL_FACTOR, includeFactoredElement); factoredAlternatives[i] = factoredAlt; if (factoredAlt != null) { factoredIntervals.Add(alternative.ChildIndex); } } } if (factoredIntervals.IsNil && !mode.IncludeUnfactoredAlts()) { return(false); } else if (unfactoredIntervals.IsNil && !mode.IncludeFactoredAlts()) { return(false); } if (unfactoredIntervals.IsNil && factoredIntervals.Count == alternatives.Count && mode.IncludeFactoredAlts() && !includeFactoredElement) { for (int i = 0; i < factoredAlternatives.Length; i++) { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.SetChild(block, i, translatedAlt); } return(true); } else if (factoredIntervals.IsNil && unfactoredIntervals.Count == alternatives.Count && mode.IncludeUnfactoredAlts()) { for (int i = 0; i < unfactoredAlternatives.Length; i++) { GrammarAST translatedAlt = unfactoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.SetChild(block, i, translatedAlt); } return(true); } if (mode == DecisionFactorMode.FULL_FACTOR) { return(false); } /* for a, b, c being arbitrary `element` trees, this block performs * this transformation: * * factoredElement a * | factoredElement b * | factoredElement c * | ... * * ==> * * factoredElement (a | b | c | ...) */ GrammarAST newChildren = (GrammarAST)adaptor.Nil(); for (int i = 0; i < alternatives.Count; i++) { if (mode.IncludeFactoredAlts() && factoredIntervals.Contains(i)) { bool combineWithPrevious = i > 0 && factoredIntervals.Contains(i - 1) && (!mode.IncludeUnfactoredAlts() || !unfactoredIntervals.Contains(i - 1)); if (combineWithPrevious) { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } GrammarAST previous = (GrammarAST)newChildren.GetChild(newChildren.ChildCount - 1); #if false if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, previous.ToStringTree()); LOGGER.log(Level.FINE, translatedAlt.ToStringTree()); } #endif if (previous.ChildCount == 1 || previous.GetChild(1).Type != ANTLRParser.BLOCK) { GrammarAST newBlock = new BlockAST(adaptor.CreateToken(ANTLRParser.BLOCK, "BLOCK")); GrammarAST newAlt = new AltAST(adaptor.CreateToken(ANTLRParser.ALT, "ALT")); adaptor.AddChild(newBlock, newAlt); while (previous.ChildCount > 1) { adaptor.AddChild(newAlt, previous.DeleteChild(1)); } if (newAlt.ChildCount == 0) { adaptor.AddChild(newAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(previous, newBlock); } if (translatedAlt.ChildCount == 1 || translatedAlt.GetChild(1).Type != ANTLRParser.BLOCK) { GrammarAST newBlock = new BlockAST(adaptor.CreateToken(ANTLRParser.BLOCK, "BLOCK")); GrammarAST newAlt = new AltAST(adaptor.CreateToken(ANTLRParser.ALT, "ALT")); adaptor.AddChild(newBlock, newAlt); while (translatedAlt.ChildCount > 1) { adaptor.AddChild(newAlt, translatedAlt.DeleteChild(1)); } if (newAlt.ChildCount == 0) { adaptor.AddChild(newAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(translatedAlt, newBlock); } GrammarAST combinedBlock = (GrammarAST)previous.GetChild(1); adaptor.AddChild(combinedBlock, translatedAlt.GetChild(1).GetChild(0)); #if false if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, previous.ToStringTree()); } #endif } else { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(newChildren, translatedAlt); } } if (mode.IncludeUnfactoredAlts() && unfactoredIntervals.Contains(i)) { GrammarAST translatedAlt = unfactoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(newChildren, translatedAlt); } } adaptor.ReplaceChildren(block, 0, block.ChildCount - 1, newChildren); if (!variant && block.Parent is RuleAST) { RuleAST ruleAST = (RuleAST)block.Parent; string ruleName = ruleAST.GetChild(0).Text; Rule r = _rules[ruleName]; IList <GrammarAST> blockAlts = block.GetAllChildrenWithType(ANTLRParser.ALT); r.numberOfAlts = blockAlts.Count; r.alt = new Alternative[blockAlts.Count + 1]; for (int i = 0; i < blockAlts.Count; i++) { r.alt[i + 1] = new Alternative(r, i + 1); r.alt[i + 1].ast = (AltAST)blockAlts[i]; } } return(true); }