/** * <pre> * (RULE e int _p (returns int v) * (BLOCK * (ALT * (BLOCK * (ALT INT {$v = $INT.int;}) * (ALT '(' (= x e) ')' {$v = $x.v;}) * (ALT ID)) * (* (BLOCK * (OPTIONS ...) * (ALT {7 >= $_p}? '*' (= b e) {$v = $a.v * $b.v;}) * (ALT {6 >= $_p}? '+' (= b e) {$v = $a.v + $b.v;}) * (ALT {3 >= $_p}? '++') (ALT {2 >= $_p}? '--')))))) * </pre> */ public virtual void SetAltASTPointers(LeftRecursiveRule r, RuleAST t) { //System.Console.WriteLine("RULE: " + t.ToStringTree()); BlockAST ruleBlk = (BlockAST)t.GetFirstChildWithType(ANTLRParser.BLOCK); AltAST mainAlt = (AltAST)ruleBlk.GetChild(0); BlockAST primaryBlk = (BlockAST)mainAlt.GetChild(0); BlockAST opsBlk = (BlockAST)mainAlt.GetChild(1).GetChild(0); // (* BLOCK ...) for (int i = 0; i < r.recPrimaryAlts.Count; i++) { LeftRecursiveRuleAltInfo altInfo = r.recPrimaryAlts[i]; altInfo.altAST = (AltAST)primaryBlk.GetChild(i); altInfo.altAST.leftRecursiveAltInfo = altInfo; altInfo.originalAltAST.leftRecursiveAltInfo = altInfo; //altInfo.originalAltAST.Parent = altInfo.altAST.Parent; //System.Console.WriteLine(altInfo.altAST.ToStringTree()); } for (int i = 0; i < r.recOpAlts.Count; i++) { LeftRecursiveRuleAltInfo altInfo = r.recOpAlts.GetElement(i); altInfo.altAST = (AltAST)opsBlk.GetChild(i); altInfo.altAST.leftRecursiveAltInfo = altInfo; altInfo.originalAltAST.leftRecursiveAltInfo = altInfo; //altInfo.originalAltAST.Parent = altInfo.altAST.Parent; //System.Console.WriteLine(altInfo.altAST.ToStringTree()); } }
public virtual RuleAST ParseArtificialRule(Grammar g, string ruleText) { ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText)); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(lexer.CharStream); CommonTokenStream tokens = new CommonTokenStream(lexer); lexer.tokens = tokens; ToolANTLRParser p = new ToolANTLRParser(tokens, tool); p.TreeAdaptor = adaptor; IToken ruleStart = null; try { Antlr.Runtime.AstParserRuleReturnScope <GrammarAST, IToken> r = p.rule(); RuleAST tree = (RuleAST)r.Tree; ruleStart = r.Start; GrammarTransformPipeline.SetGrammarPtr(g, tree); GrammarTransformPipeline.AugmentTokensWithOriginalPosition(g, tree); return(tree); } catch (Exception e) { tool.errMgr.ToolError(ErrorType.INTERNAL_ERROR, e, ruleStart, "error parsing rule created during left-recursion detection: " + ruleText); } return(null); }
public override void DiscoverRule(RuleAST rule, GrammarAST ID, IList <GrammarAST> modifiers, ActionAST arg, ActionAST returns, GrammarAST thrws, GrammarAST options, ActionAST locals, IList <GrammarAST> actions, GrammarAST block) { currentRule = g.GetRule(ID.Text); }
public LeftRecursiveRule(Grammar g, string name, RuleAST ast) : base(g, name, ast, 1) { originalAST = ast; alt = new Alternative[numberOfAlts + 1]; // always just one for (int i = 1; i <= numberOfAlts; i++) { alt[i] = new Alternative(this, i); } }
public override void DiscoverRule(RuleAST rule, GrammarAST ID, IList <GrammarAST> modifiers, ActionAST arg, ActionAST returns, GrammarAST thrws, GrammarAST options, ActionAST locals, IList <GrammarAST> actions, GrammarAST block) { // TODO: chk that all or no alts have "# label" CheckInvalidRuleDef(ID.Token); }
public int actionIndex = -1; // if lexer; 0..n-1 for n actions in a rule public Rule(Grammar g, string name, RuleAST ast, int numberOfAlts) { this.g = g; this.name = name; this.ast = ast; this.numberOfAlts = numberOfAlts; alt = new Alternative[numberOfAlts + 1]; // 1..n for (int i = 1; i <= numberOfAlts; i++) { alt[i] = new Alternative(this, i); } }
public override void FinishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { if (rule.IsLexerRule()) { return; } BlockAST blk = (BlockAST)rule.GetFirstChildWithType(BLOCK); int nalts = blk.ChildCount; GrammarAST idAST = (GrammarAST)rule.GetChild(0); for (int i = 0; i < nalts; i++) { AltAST altAST = (AltAST)blk.GetChild(i); if (altAST.altLabel != null) { string altLabel = altAST.altLabel.Text; // first check that label doesn't conflict with a rule // label X or x can't be rule x. Rule r; if (ruleCollector.rules.TryGetValue(Utils.Decapitalize(altLabel), out r) && r != null) { g.tool.errMgr.GrammarError(ErrorType.ALT_LABEL_CONFLICTS_WITH_RULE, g.fileName, altAST.altLabel.Token, altLabel, r.name); } // Now verify that label X or x doesn't conflict with label // in another rule. altLabelToRuleName has both X and x mapped. string prevRuleForLabel; if (ruleCollector.altLabelToRuleName.TryGetValue(altLabel, out prevRuleForLabel) && prevRuleForLabel != null && !prevRuleForLabel.Equals(rule.GetRuleName())) { g.tool.errMgr.GrammarError(ErrorType.ALT_LABEL_REDEF, g.fileName, altAST.altLabel.Token, altLabel, rule.GetRuleName(), prevRuleForLabel); } } } IList <GrammarAST> altLabels; int numAltLabels = 0; if (ruleCollector.ruleToAltLabels.TryGetValue(rule.GetRuleName(), out altLabels) && altLabels != null) { numAltLabels = altLabels.Count; } if (numAltLabels > 0 && nalts != numAltLabels) { g.tool.errMgr.GrammarError(ErrorType.RULE_WITH_TOO_FEW_ALT_LABELS, g.fileName, idAST.Token, rule.GetRuleName()); } }
public override void DiscoverLexerRule(RuleAST rule, GrammarAST ID, IList <GrammarAST> modifiers, GrammarAST block) { int numAlts = block.ChildCount; Rule r = new Rule(g, ID.Text, rule, numAlts); r.mode = currentModeName; if (modifiers.Count > 0) { r.modifiers = modifiers; } rules[r.name] = r; }
public override void DiscoverRule(RuleAST rule, GrammarAST ID, IList <GrammarAST> modifiers, ActionAST arg, ActionAST returns, GrammarAST thrws, GrammarAST options, ActionAST locals, IList <GrammarAST> actions, GrammarAST block) { int numAlts = block.ChildCount; Rule r; if (LeftRecursiveRuleAnalyzer.HasImmediateRecursiveRuleRefs(rule, ID.Text)) { r = new LeftRecursiveRule(g, ID.Text, rule); } else { r = new Rule(g, ID.Text, rule, numAlts); } rules[r.name] = r; if (arg != null) { r.args = ScopeParser.ParseTypedArgList(arg, arg.Text, g); r.args.type = AttributeDict.DictType.ARG; r.args.ast = arg; arg.resolver = r.alt[currentOuterAltNumber]; } if (returns != null) { r.retvals = ScopeParser.ParseTypedArgList(returns, returns.Text, g); r.retvals.type = AttributeDict.DictType.RET; r.retvals.ast = returns; } if (locals != null) { r.locals = ScopeParser.ParseTypedArgList(locals, locals.Text, g); r.locals.type = AttributeDict.DictType.LOCAL; r.locals.ast = locals; } foreach (GrammarAST a in actions) { // a = ^(AT ID ACTION) ActionAST action = (ActionAST)a.GetChild(1); r.namedActions[a.GetChild(0).Text] = action; action.resolver = r; } }
protected virtual bool ExpandOptionalQuantifiersForBlock(GrammarAST block, bool variant) { IList <GrammarAST> children = new List <GrammarAST>(); for (int i = 0; i < block.ChildCount; i++) { GrammarAST child = (GrammarAST)block.GetChild(i); if (child.Type != ANTLRParser.ALT) { children.Add(child); continue; } GrammarAST expandedAlt = ExpandOptionalQuantifiersForAlt(child); if (expandedAlt == null) { return(false); } children.Add(expandedAlt); } GrammarAST newChildren = (GrammarAST)adaptor.Nil(); newChildren.AddChildren(children); block.ReplaceChildren(0, block.ChildCount - 1, newChildren); block.FreshenParentAndChildIndexesDeeply(); if (!variant && block.Parent is RuleAST) { RuleAST ruleAST = (RuleAST)block.Parent; string ruleName = ruleAST.GetChild(0).Text; Rule r = _rules[ruleName]; IList <GrammarAST> blockAlts = block.GetAllChildrenWithType(ANTLRParser.ALT); r.numberOfAlts = blockAlts.Count; r.alt = new Alternative[blockAlts.Count + 1]; for (int i = 0; i < blockAlts.Count; i++) { r.alt[i + 1] = new Alternative(r, i + 1); r.alt[i + 1].ast = (AltAST)blockAlts[i]; } } return(true); }
/** * Important enough to avoid multiple definitions that we do very early, * right after AST construction. Also check for undefined rules in * parser/lexer to avoid exceptions later. Return true if we find multiple * definitions of the same rule or a reference to an undefined rule or * parser rule ref in lexer rule. */ public virtual bool CheckForRuleIssues(Grammar g) { // check for redefined rules GrammarAST RULES = (GrammarAST)g.ast.GetFirstChildWithType(ANTLRParser.RULES); IList <GrammarAST> rules = new List <GrammarAST>(RULES.GetAllChildrenWithType(ANTLRParser.RULE)); foreach (GrammarAST mode in g.ast.GetAllChildrenWithType(ANTLRParser.MODE)) { foreach (GrammarAST child in mode.GetAllChildrenWithType(ANTLRParser.RULE)) { rules.Add(child); } } bool redefinition = false; IDictionary <string, RuleAST> ruleToAST = new Dictionary <string, RuleAST>(); foreach (GrammarAST r in rules) { RuleAST ruleAST = (RuleAST)r; GrammarAST ID = (GrammarAST)ruleAST.GetChild(0); string ruleName = ID.Text; RuleAST prev; if (ruleToAST.TryGetValue(ruleName, out prev) && prev != null) { GrammarAST prevChild = (GrammarAST)prev.GetChild(0); g.tool.errMgr.GrammarError(ErrorType.RULE_REDEFINITION, g.fileName, ID.Token, ruleName, prevChild.Token.Line); redefinition = true; continue; } ruleToAST[ruleName] = ruleAST; } // check for undefined rules UndefChecker chk = new UndefChecker(this, g, ruleToAST); chk.VisitGrammar(g.ast); return(redefinition || chk.badref); }
public override void DiscoverLexerRule(RuleAST rule, GrammarAST ID, IList <GrammarAST> modifiers, GrammarAST block) { CheckInvalidRuleDef(ID.Token); if (modifiers != null) { foreach (GrammarAST tree in modifiers) { if (tree.Type == ANTLRParser.FRAGMENT) { inFragmentRule = true; } } } if (!inFragmentRule) { nonFragmentRuleCount++; } }
/** Make sure even imaginary nodes know the input stream */ public override object Create(int tokenType, string text) { GrammarAST t; if (tokenType == ANTLRParser.RULE) { // needed by TreeWizard to make RULE tree t = new RuleAST(new CommonToken(tokenType, text)); } else if (tokenType == ANTLRParser.STRING_LITERAL) { // implicit lexer construction done with wizard; needs this node type // whereas grammar ANTLRParser.g can use token option to spec node type t = new TerminalAST(new CommonToken(tokenType, text)); } else { t = (GrammarAST)base.Create(tokenType, text); } t.Token.InputStream = input; return(t); }
protected virtual bool TranslateLeftFactoredDecision(GrammarAST block, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } else if (mode == DecisionFactorMode.COMBINED_FACTOR && !includeFactoredElement) { throw new ArgumentException("Cannot return a combined answer without the factored element."); } if (!ExpandOptionalQuantifiersForBlock(block, variant)) { return(false); } IList <GrammarAST> alternatives = block.GetAllChildrenWithType(ANTLRParser.ALT); GrammarAST[] factoredAlternatives = new GrammarAST[alternatives.Count]; GrammarAST[] unfactoredAlternatives = new GrammarAST[alternatives.Count]; IntervalSet factoredIntervals = new IntervalSet(); IntervalSet unfactoredIntervals = new IntervalSet(); for (int i = 0; i < alternatives.Count; i++) { GrammarAST alternative = alternatives[i]; if (mode.IncludeUnfactoredAlts()) { GrammarAST unfactoredAlt = TranslateLeftFactoredAlternative(alternative.DupTree(), factoredRule, variant, DecisionFactorMode.PARTIAL_UNFACTORED, false); unfactoredAlternatives[i] = unfactoredAlt; if (unfactoredAlt != null) { unfactoredIntervals.Add(i); } } if (mode.IncludeFactoredAlts()) { GrammarAST factoredAlt = TranslateLeftFactoredAlternative(alternative, factoredRule, variant, mode == DecisionFactorMode.COMBINED_FACTOR ? DecisionFactorMode.PARTIAL_FACTORED : DecisionFactorMode.FULL_FACTOR, includeFactoredElement); factoredAlternatives[i] = factoredAlt; if (factoredAlt != null) { factoredIntervals.Add(alternative.ChildIndex); } } } if (factoredIntervals.IsNil && !mode.IncludeUnfactoredAlts()) { return(false); } else if (unfactoredIntervals.IsNil && !mode.IncludeFactoredAlts()) { return(false); } if (unfactoredIntervals.IsNil && factoredIntervals.Count == alternatives.Count && mode.IncludeFactoredAlts() && !includeFactoredElement) { for (int i = 0; i < factoredAlternatives.Length; i++) { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.SetChild(block, i, translatedAlt); } return(true); } else if (factoredIntervals.IsNil && unfactoredIntervals.Count == alternatives.Count && mode.IncludeUnfactoredAlts()) { for (int i = 0; i < unfactoredAlternatives.Length; i++) { GrammarAST translatedAlt = unfactoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.SetChild(block, i, translatedAlt); } return(true); } if (mode == DecisionFactorMode.FULL_FACTOR) { return(false); } /* for a, b, c being arbitrary `element` trees, this block performs * this transformation: * * factoredElement a * | factoredElement b * | factoredElement c * | ... * * ==> * * factoredElement (a | b | c | ...) */ GrammarAST newChildren = (GrammarAST)adaptor.Nil(); for (int i = 0; i < alternatives.Count; i++) { if (mode.IncludeFactoredAlts() && factoredIntervals.Contains(i)) { bool combineWithPrevious = i > 0 && factoredIntervals.Contains(i - 1) && (!mode.IncludeUnfactoredAlts() || !unfactoredIntervals.Contains(i - 1)); if (combineWithPrevious) { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } GrammarAST previous = (GrammarAST)newChildren.GetChild(newChildren.ChildCount - 1); #if false if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, previous.ToStringTree()); LOGGER.log(Level.FINE, translatedAlt.ToStringTree()); } #endif if (previous.ChildCount == 1 || previous.GetChild(1).Type != ANTLRParser.BLOCK) { GrammarAST newBlock = new BlockAST(adaptor.CreateToken(ANTLRParser.BLOCK, "BLOCK")); GrammarAST newAlt = new AltAST(adaptor.CreateToken(ANTLRParser.ALT, "ALT")); adaptor.AddChild(newBlock, newAlt); while (previous.ChildCount > 1) { adaptor.AddChild(newAlt, previous.DeleteChild(1)); } if (newAlt.ChildCount == 0) { adaptor.AddChild(newAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(previous, newBlock); } if (translatedAlt.ChildCount == 1 || translatedAlt.GetChild(1).Type != ANTLRParser.BLOCK) { GrammarAST newBlock = new BlockAST(adaptor.CreateToken(ANTLRParser.BLOCK, "BLOCK")); GrammarAST newAlt = new AltAST(adaptor.CreateToken(ANTLRParser.ALT, "ALT")); adaptor.AddChild(newBlock, newAlt); while (translatedAlt.ChildCount > 1) { adaptor.AddChild(newAlt, translatedAlt.DeleteChild(1)); } if (newAlt.ChildCount == 0) { adaptor.AddChild(newAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(translatedAlt, newBlock); } GrammarAST combinedBlock = (GrammarAST)previous.GetChild(1); adaptor.AddChild(combinedBlock, translatedAlt.GetChild(1).GetChild(0)); #if false if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, previous.ToStringTree()); } #endif } else { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(newChildren, translatedAlt); } } if (mode.IncludeUnfactoredAlts() && unfactoredIntervals.Contains(i)) { GrammarAST translatedAlt = unfactoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(newChildren, translatedAlt); } } adaptor.ReplaceChildren(block, 0, block.ChildCount - 1, newChildren); if (!variant && block.Parent is RuleAST) { RuleAST ruleAST = (RuleAST)block.Parent; string ruleName = ruleAST.GetChild(0).Text; Rule r = _rules[ruleName]; IList <GrammarAST> blockAlts = block.GetAllChildrenWithType(ANTLRParser.ALT); r.numberOfAlts = blockAlts.Count; r.alt = new Alternative[blockAlts.Count + 1]; for (int i = 0; i < blockAlts.Count; i++) { r.alt[i + 1] = new Alternative(r, i + 1); r.alt[i + 1].ast = (AltAST)blockAlts[i]; } } return(true); }
/** Return true if successful */ public virtual bool TranslateLeftRecursiveRule(GrammarRootAST ast, LeftRecursiveRule r, string language) { //tool.log("grammar", ruleAST.toStringTree()); GrammarAST prevRuleAST = r.ast; string ruleName = prevRuleAST.GetChild(0).Text; LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = new LeftRecursiveRuleAnalyzer(prevRuleAST, tool, ruleName, language); bool isLeftRec; try { //System.Console.WriteLine("TESTING ---------------\n" + // leftRecursiveRuleWalker.Text(ruleAST)); isLeftRec = leftRecursiveRuleWalker.rec_rule(); } catch (RecognitionException) { isLeftRec = false; // didn't match; oh well } if (!isLeftRec) { return(false); } // replace old rule's AST; first create text of altered rule GrammarAST RULES = (GrammarAST)ast.GetFirstChildWithType(ANTLRParser.RULES); string newRuleText = leftRecursiveRuleWalker.GetArtificialOpPrecRule(); //System.Console.WriteLine("created: " + newRuleText); // now parse within the context of the grammar that originally created // the AST we are transforming. This could be an imported grammar so // we cannot just reference this.g because the role might come from // the imported grammar and not the root grammar (this.g) RuleAST t = ParseArtificialRule(prevRuleAST.g, newRuleText); // reuse the name token from the original AST since it refers to the proper source location in the original grammar ((GrammarAST)t.GetChild(0)).Token = ((GrammarAST)prevRuleAST.GetChild(0)).Token; // update grammar AST and set rule's AST. RULES.SetChild(prevRuleAST.ChildIndex, t); r.ast = t; // Reduce sets in newly created rule tree GrammarTransformPipeline transform = new GrammarTransformPipeline(g, g.tool); transform.ReduceBlocksToSets(r.ast); transform.ExpandParameterizedLoops(r.ast); // Rerun semantic checks on the new rule RuleCollector ruleCollector = new RuleCollector(g); ruleCollector.Visit(t, "rule"); BasicSemanticChecks basics = new BasicSemanticChecks(g, ruleCollector); // disable the assoc element option checks because they are already // handled for the pre-transformed rule. basics.checkAssocElementOption = false; basics.Visit(t, "rule"); // track recursive alt info for codegen r.recPrimaryAlts = new List <LeftRecursiveRuleAltInfo>(); foreach (var altInfo in leftRecursiveRuleWalker.prefixAndOtherAlts) { r.recPrimaryAlts.Add(altInfo); } if (r.recPrimaryAlts.Count == 0) { tool.errMgr.GrammarError(ErrorType.NO_NON_LR_ALTS, g.fileName, ((GrammarAST)r.ast.GetChild(0)).Token, r.name); } r.recOpAlts = new OrderedHashMap <int, LeftRecursiveRuleAltInfo>(); foreach (var pair in leftRecursiveRuleWalker.binaryAlts) { r.recOpAlts[pair.Key] = pair.Value; } foreach (var pair in leftRecursiveRuleWalker.ternaryAlts) { r.recOpAlts[pair.Key] = pair.Value; } foreach (var pair in leftRecursiveRuleWalker.suffixAlts) { r.recOpAlts[pair.Key] = pair.Value; } // walk alt info records and set their altAST to point to appropriate ALT subtree // from freshly created AST SetAltASTPointers(r, t); // update Rule to just one alt and add prec alt ActionAST arg = (ActionAST)r.ast.GetFirstChildWithType(ANTLRParser.ARG_ACTION); if (arg != null) { r.args = ScopeParser.ParseTypedArgList(arg, arg.Text, g); r.args.type = AttributeDict.DictType.ARG; r.args.ast = arg; arg.resolver = r.alt[1]; // todo: isn't this Rule or something? } // define labels on recursive rule refs we delete; they don't point to nodes of course // these are so $label in action translation works foreach (System.Tuple <GrammarAST, string> pair in leftRecursiveRuleWalker.leftRecursiveRuleRefLabels) { GrammarAST labelNode = pair.Item1; GrammarAST labelOpNode = (GrammarAST)labelNode.Parent; GrammarAST elementNode = (GrammarAST)labelOpNode.GetChild(1); LabelElementPair lp = new LabelElementPair(g, labelNode, elementNode, labelOpNode.Type); r.alt[1].labelDefs.Map(labelNode.Text, lp); } // copy to rule from walker r.leftRecursiveRuleRefLabels = leftRecursiveRuleWalker.leftRecursiveRuleRefLabels; tool.Log("grammar", "added: " + t.ToStringTree()); return(true); }
public override void DiscoverLexerRule(RuleAST rule, GrammarAST ID, IList <GrammarAST> modifiers, GrammarAST block) { currentRule = g.GetRule(ID.Text); }
/** Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. * We'll have this Grammar share token symbols later; don't generate * tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes * in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues */ public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar) { GrammarRootAST combinedAST = combinedGrammar.ast; //tool.log("grammar", "before="+combinedAST.toStringTree()); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream); GrammarAST[] elements = combinedAST.GetChildrenAsArray(); // MAKE A GRAMMAR ROOT and ID string lexerName = combinedAST.GetChild(0).Text + "Lexer"; GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream); lexerAST.grammarType = ANTLRParser.LEXER; lexerAST.Token.InputStream = combinedAST.Token.InputStream; lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName)); // COPY OPTIONS GrammarAST optionsRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null && optionsRoot.ChildCount != 0) { GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot); lexerAST.AddChild(lexerOptionsRoot); GrammarAST[] options = optionsRoot.GetChildrenAsArray(); foreach (GrammarAST o in options) { string optionName = o.GetChild(0).Text; if (Grammar.lexerOptions.Contains(optionName) && !Grammar.doNotCopyOptionsToLexer.Contains(optionName)) { GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o); lexerOptionsRoot.AddChild(optionTree); lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1)); } } } // COPY all named actions, but only move those with lexer:: scope IList <GrammarAST> actionsWeMoved = new List <GrammarAST>(); foreach (GrammarAST e in elements) { if (e.Type == ANTLRParser.AT) { lexerAST.AddChild((ITree)adaptor.DupTree(e)); if (e.GetChild(0).Text.Equals("lexer")) { actionsWeMoved.Add(e); } } } foreach (GrammarAST r in actionsWeMoved) { combinedAST.DeleteChild(r); } GrammarAST combinedRulesRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES); if (combinedRulesRoot == null) { return(lexerAST); } // MOVE lexer rules GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES"); lexerAST.AddChild(lexerRulesRoot); IList <GrammarAST> rulesWeMoved = new List <GrammarAST>(); GrammarASTWithOptions[] rules; if (combinedRulesRoot.ChildCount > 0) { rules = combinedRulesRoot.Children.Cast <GrammarASTWithOptions>().ToArray(); } else { rules = new GrammarASTWithOptions[0]; } foreach (GrammarASTWithOptions r in rules) { string ruleName = r.GetChild(0).Text; if (Grammar.IsTokenName(ruleName)) { lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r)); rulesWeMoved.Add(r); } } foreach (GrammarAST r in rulesWeMoved) { combinedRulesRoot.DeleteChild(r); } // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if' IList <System.Tuple <GrammarAST, GrammarAST> > litAliases = Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST); ISet <string> stringLiterals = combinedGrammar.GetStringLiterals(); // add strings from combined grammar (and imported grammars) into lexer // put them first as they are keywords; must resolve ambigs to these rules // tool.log("grammar", "strings from parser: "+stringLiterals); int insertIndex = 0; foreach (string lit in stringLiterals) { // if lexer already has a rule for literal, continue if (litAliases != null) { foreach (System.Tuple <GrammarAST, GrammarAST> pair in litAliases) { GrammarAST litAST = pair.Item2; if (lit.Equals(litAST.Text)) { goto continueNextLit; } } } // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>)) string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit); // can't use wizard; need special node types GrammarAST litRule = new RuleAST(ANTLRParser.RULE); BlockAST blk = new BlockAST(ANTLRParser.BLOCK); AltAST alt = new AltAST(ANTLRParser.ALT); TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.AddChild(slit); blk.AddChild(alt); CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.AddChild(new TerminalAST(idToken)); litRule.AddChild(blk); lexerRulesRoot.InsertChild(insertIndex, litRule); // lexerRulesRoot.getChildren().add(0, litRule); lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent // next literal will be added after the one just added insertIndex++; continueNextLit: ; } // TODO: take out after stable if slow lexerAST.SanityCheckParentAndChildIndexes(); combinedAST.SanityCheckParentAndChildIndexes(); // tool.log("grammar", combinedAST.toTokenString()); combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree()); combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree()); if (lexerRulesRoot.ChildCount == 0) { return(null); } return(lexerAST); }
protected virtual GrammarAST TranslateLeftFactoredElement(GrammarAST element, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } if (mode == DecisionFactorMode.COMBINED_FACTOR) { throw new InvalidOperationException("Cannot return a combined answer."); } Debug.Assert(!mode.IncludeFactoredAlts() || !mode.IncludeUnfactoredAlts()); switch (element.Type) { case ANTLRParser.ASSIGN: case ANTLRParser.PLUS_ASSIGN: { /* label=a * * ==> * * factoredElement label=a_factored */ GrammarAST translatedChildElement = TranslateLeftFactoredElement((GrammarAST)element.GetChild(1), factoredRule, variant, mode, includeFactoredElement); if (translatedChildElement == null) { return(null); } RuleAST ruleAST = (RuleAST)element.GetAncestor(ANTLRParser.RULE); #if false LOGGER.log(Level.WARNING, "Could not left factor ''{0}'' out of decision in rule ''{1}'': labeled rule references are not yet supported.", new object[] { factoredRule, ruleAST.GetChild(0).Text }); #endif return(null); //if (!translatedChildElement.IsNil) //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = translatedChildElement; // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // string action = string.Format("_localctx.{0} = (ContextType)_localctx.getParent().getChild(_localctx.getParent().getChildCount() - 1);", element.GetChild(0).Text); // adaptor.AddChild(root, new ActionAST(adaptor.CreateToken(ANTLRParser.ACTION, action))); // return root; //} //else //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = adaptor.DeleteChild(translatedChildElement, 0); // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // adaptor.AddChild(root, element); // adaptor.ReplaceChildren(element, 1, 1, translatedChildElement); // return root; //} } case ANTLRParser.RULE_REF: { if (factoredRule.Equals(element.Token.Text)) { if (!mode.IncludeFactoredAlts()) { return(null); } if (includeFactoredElement) { // this element is already left factored return(element); } GrammarAST root1 = (GrammarAST)adaptor.Nil(); root1.AddChild((ITree)adaptor.Create(TokenConstants.Epsilon, "EPSILON")); root1.DeleteChild(0); return(root1); } Rule targetRule; if (!_rules.TryGetValue(element.Token.Text, out targetRule)) { return(null); } RuleVariants ruleVariants = CreateLeftFactoredRuleVariant(targetRule, factoredRule); switch (ruleVariants) { case RuleVariants.NONE: if (!mode.IncludeUnfactoredAlts()) { return(null); } // just call the original rule (leave the element unchanged) return(element); case RuleVariants.FULLY_FACTORED: if (!mode.IncludeFactoredAlts()) { return(null); } break; case RuleVariants.PARTIALLY_FACTORED: break; default: throw new InvalidOperationException(); } string marker = mode.IncludeFactoredAlts() ? ATNSimulator.RuleLfVariantMarker : ATNSimulator.RuleNolfVariantMarker; element.SetText(element.Text + marker + factoredRule); GrammarAST root = (GrammarAST)adaptor.Nil(); if (includeFactoredElement) { Debug.Assert(mode.IncludeFactoredAlts()); RuleRefAST factoredRuleRef = new RuleRefAST(adaptor.CreateToken(ANTLRParser.RULE_REF, factoredRule)); factoredRuleRef.SetOption(SUPPRESS_ACCESSOR, (GrammarAST)adaptor.Create(ANTLRParser.ID, "true")); Rule factoredRuleDef = _rules[factoredRule]; if (factoredRuleDef is LeftRecursiveRule) { factoredRuleRef.SetOption(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME, (GrammarAST)adaptor.Create(ANTLRParser.INT, "0")); } if (factoredRuleDef.args != null && factoredRuleDef.args.Size() > 0) { throw new NotImplementedException("Cannot left-factor rules with arguments yet."); } adaptor.AddChild(root, factoredRuleRef); } adaptor.AddChild(root, element); return(root); } case ANTLRParser.BLOCK: { GrammarAST cloned = element.DupTree(); if (!TranslateLeftFactoredDecision(cloned, factoredRule, variant, mode, includeFactoredElement)) { return(null); } if (cloned.ChildCount != 1) { return(null); } GrammarAST root = (GrammarAST)adaptor.Nil(); for (int i = 0; i < cloned.GetChild(0).ChildCount; i++) { adaptor.AddChild(root, cloned.GetChild(0).GetChild(i)); } return(root); } case ANTLRParser.POSITIVE_CLOSURE: { /* a+ * * => * * factoredElement a_factored a* */ GrammarAST originalChildElement = (GrammarAST)element.GetChild(0); GrammarAST translatedElement = TranslateLeftFactoredElement(originalChildElement.DupTree(), factoredRule, variant, mode, includeFactoredElement); if (translatedElement == null) { return(null); } GrammarAST closure = new StarBlockAST(ANTLRParser.CLOSURE, adaptor.CreateToken(ANTLRParser.CLOSURE, "CLOSURE"), null); adaptor.AddChild(closure, originalChildElement); GrammarAST root = (GrammarAST)adaptor.Nil(); if (mode.IncludeFactoredAlts()) { if (includeFactoredElement) { object factoredElement = adaptor.DeleteChild(translatedElement, 0); adaptor.AddChild(root, factoredElement); } } adaptor.AddChild(root, translatedElement); adaptor.AddChild(root, closure); return(root); } case ANTLRParser.CLOSURE: case ANTLRParser.OPTIONAL: // not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.DOT: // ref to imported grammar, not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.ACTION: case ANTLRParser.SEMPRED: if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.WILDCARD: case ANTLRParser.STRING_LITERAL: case ANTLRParser.TOKEN_REF: case ANTLRParser.NOT: // terminals if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.EPSILON: // empty tree if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); default: // unknown return(null); } }
protected virtual RuleVariants CreateLeftFactoredRuleVariant(Rule rule, string factoredElement) { RuleAST ast = (RuleAST)rule.ast.DupTree(); BlockAST block = (BlockAST)ast.GetFirstChildWithType(ANTLRParser.BLOCK); RuleAST unfactoredAst = null; BlockAST unfactoredBlock = null; if (TranslateLeftFactoredDecision(block, factoredElement, true, DecisionFactorMode.FULL_FACTOR, false)) { // all alternatives factored } else { ast = (RuleAST)rule.ast.DupTree(); block = (BlockAST)ast.GetFirstChildWithType(ANTLRParser.BLOCK); if (!TranslateLeftFactoredDecision(block, factoredElement, true, DecisionFactorMode.PARTIAL_FACTORED, false)) { // no left factored alts return(RuleVariants.NONE); } unfactoredAst = (RuleAST)rule.ast.DupTree(); unfactoredBlock = (BlockAST)unfactoredAst.GetFirstChildWithType(ANTLRParser.BLOCK); if (!TranslateLeftFactoredDecision(unfactoredBlock, factoredElement, true, DecisionFactorMode.PARTIAL_UNFACTORED, false)) { throw new InvalidOperationException("expected unfactored alts for partial factorization"); } } /* * factored elements */ { string variantName = ast.GetChild(0).Text + ATNSimulator.RuleLfVariantMarker + factoredElement; ((GrammarAST)ast.GetChild(0)).Token = adaptor.CreateToken(ast.GetChild(0).Type, variantName); GrammarAST ruleParent = (GrammarAST)rule.ast.Parent; ruleParent.InsertChild(rule.ast.ChildIndex + 1, ast); ruleParent.FreshenParentAndChildIndexes(rule.ast.ChildIndex); IList <GrammarAST> alts = block.GetAllChildrenWithType(ANTLRParser.ALT); Rule variant = new Rule(_g, ast.GetChild(0).Text, ast, alts.Count); _g.DefineRule(variant); for (int i = 0; i < alts.Count; i++) { variant.alt[i + 1].ast = (AltAST)alts[i]; } } /* * unfactored elements */ if (unfactoredAst != null) { string variantName = unfactoredAst.GetChild(0).Text + ATNSimulator.RuleNolfVariantMarker + factoredElement; ((GrammarAST)unfactoredAst.GetChild(0)).Token = adaptor.CreateToken(unfactoredAst.GetChild(0).Type, variantName); GrammarAST ruleParent = (GrammarAST)rule.ast.Parent; ruleParent.InsertChild(rule.ast.ChildIndex + 1, unfactoredAst); ruleParent.FreshenParentAndChildIndexes(rule.ast.ChildIndex); IList <GrammarAST> alts = unfactoredBlock.GetAllChildrenWithType(ANTLRParser.ALT); Rule variant = new Rule(_g, unfactoredAst.GetChild(0).Text, unfactoredAst, alts.Count); _g.DefineRule(variant); for (int i = 0; i < alts.Count; i++) { variant.alt[i + 1].ast = (AltAST)alts[i]; } } /* * result */ return(unfactoredAst == null ? RuleVariants.FULLY_FACTORED : RuleVariants.PARTIALLY_FACTORED); }