/** Remove any lexer rules from a COMBINED; already passed to lexer */ protected void TrimGrammar() { if (grammar.type != GrammarType.Combined) { return; } // form is (header ... ) ( grammar ID (scope ...) ... ( rule ... ) ( rule ... ) ... ) GrammarAST p = root; // find the grammar spec while (!p.Text.Equals("grammar")) { p = (GrammarAST)p.Parent.GetChild(p.ChildIndex + 1); } for (int i = 0; i < p.ChildCount; i++) { if (p.GetChild(i).Type != RULE) { continue; } string ruleName = p.GetChild(i).GetChild(0).Text; //Console.Out.WriteLine( "rule " + ruleName + " prev=" + prev.getText() ); if (Rule.GetRuleType(ruleName) == RuleType.Lexer) { // remove lexer rule p.DeleteChild(i); i--; } } //Console.Out.WriteLine( "root after removal is: " + root.ToStringList() ); }
protected virtual GrammarAST ExpandOptionalQuantifiersForAlt(GrammarAST alt) { if (alt.ChildCount == 0) { return(null); } if (alt.GetChild(0).Type == ANTLRParser.OPTIONAL) { GrammarAST root = (GrammarAST)adaptor.Nil(); GrammarAST alt2 = alt.DupTree(); alt2.DeleteChild(0); if (alt2.ChildCount == 0) { adaptor.AddChild(alt2, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } alt.SetChild(0, alt.GetChild(0).GetChild(0)); if (alt.GetChild(0).Type == ANTLRParser.BLOCK && alt.GetChild(0).ChildCount == 1 && alt.GetChild(0).GetChild(0).Type == ANTLRParser.ALT) { GrammarAST list = (GrammarAST)adaptor.Nil(); foreach (object tree in ((GrammarAST)alt.GetChild(0).GetChild(0)).Children) { adaptor.AddChild(list, tree); } adaptor.ReplaceChildren(alt, 0, 0, list); } adaptor.AddChild(root, alt); adaptor.AddChild(root, alt2); return(root); } else if (alt.GetChild(0).Type == ANTLRParser.CLOSURE) { GrammarAST root = (GrammarAST)adaptor.Nil(); GrammarAST alt2 = alt.DupTree(); alt2.DeleteChild(0); if (alt2.ChildCount == 0) { adaptor.AddChild(alt2, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } PlusBlockAST plusBlockAST = new PlusBlockAST(ANTLRParser.POSITIVE_CLOSURE, adaptor.CreateToken(ANTLRParser.POSITIVE_CLOSURE, "+"), null); for (int i = 0; i < alt.GetChild(0).ChildCount; i++) { plusBlockAST.AddChild(alt.GetChild(0).GetChild(i)); } alt.SetChild(0, plusBlockAST); adaptor.AddChild(root, alt); adaptor.AddChild(root, alt2); return(root); } return(alt); }
// TODO: this strips the tree properly, but since text() // uses the start of stop token index and gets text from that // ineffectively ignores this routine. public virtual GrammarAST StripLeftRecursion(GrammarAST altAST) { GrammarAST lrlabel = null; GrammarAST first = (GrammarAST)altAST.GetChild(0); int leftRecurRuleIndex = 0; if (first.Type == ELEMENT_OPTIONS) { first = (GrammarAST)altAST.GetChild(1); leftRecurRuleIndex = 1; } ITree rref = first.GetChild(1); // if label=rule if ((first.Type == RULE_REF && first.Text.Equals(ruleName)) || (rref != null && rref.Type == RULE_REF && rref.Text.Equals(ruleName))) { if (first.Type == ASSIGN || first.Type == PLUS_ASSIGN) { lrlabel = (GrammarAST)first.GetChild(0); } // remove rule ref (first child unless options present) altAST.DeleteChild(leftRecurRuleIndex); // reset index so it prints properly (sets token range of // ALT to start to right of left recur rule we deleted) GrammarAST newFirstChild = (GrammarAST)altAST.GetChild(leftRecurRuleIndex); altAST.TokenStartIndex = newFirstChild.TokenStartIndex; } return(lrlabel); }
protected virtual GrammarAST TranslateLeftFactoredElement(GrammarAST element, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } if (mode == DecisionFactorMode.COMBINED_FACTOR) { throw new InvalidOperationException("Cannot return a combined answer."); } Debug.Assert(!mode.IncludeFactoredAlts() || !mode.IncludeUnfactoredAlts()); switch (element.Type) { case ANTLRParser.ASSIGN: case ANTLRParser.PLUS_ASSIGN: { /* label=a * * ==> * * factoredElement label=a_factored */ GrammarAST translatedChildElement = TranslateLeftFactoredElement((GrammarAST)element.GetChild(1), factoredRule, variant, mode, includeFactoredElement); if (translatedChildElement == null) { return(null); } RuleAST ruleAST = (RuleAST)element.GetAncestor(ANTLRParser.RULE); #if false LOGGER.log(Level.WARNING, "Could not left factor ''{0}'' out of decision in rule ''{1}'': labeled rule references are not yet supported.", new object[] { factoredRule, ruleAST.GetChild(0).Text }); #endif return(null); //if (!translatedChildElement.IsNil) //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = translatedChildElement; // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // string action = string.Format("_localctx.{0} = (ContextType)_localctx.getParent().getChild(_localctx.getParent().getChildCount() - 1);", element.GetChild(0).Text); // adaptor.AddChild(root, new ActionAST(adaptor.CreateToken(ANTLRParser.ACTION, action))); // return root; //} //else //{ // GrammarAST root = (GrammarAST)adaptor.Nil(); // object factoredElement = adaptor.DeleteChild(translatedChildElement, 0); // if (outerRule) // { // adaptor.AddChild(root, factoredElement); // } // adaptor.AddChild(root, element); // adaptor.ReplaceChildren(element, 1, 1, translatedChildElement); // return root; //} } case ANTLRParser.RULE_REF: { if (factoredRule.Equals(element.Token.Text)) { if (!mode.IncludeFactoredAlts()) { return(null); } if (includeFactoredElement) { // this element is already left factored return(element); } GrammarAST root1 = (GrammarAST)adaptor.Nil(); root1.AddChild((ITree)adaptor.Create(TokenConstants.Epsilon, "EPSILON")); root1.DeleteChild(0); return(root1); } Rule targetRule; if (!_rules.TryGetValue(element.Token.Text, out targetRule)) { return(null); } RuleVariants ruleVariants = CreateLeftFactoredRuleVariant(targetRule, factoredRule); switch (ruleVariants) { case RuleVariants.NONE: if (!mode.IncludeUnfactoredAlts()) { return(null); } // just call the original rule (leave the element unchanged) return(element); case RuleVariants.FULLY_FACTORED: if (!mode.IncludeFactoredAlts()) { return(null); } break; case RuleVariants.PARTIALLY_FACTORED: break; default: throw new InvalidOperationException(); } string marker = mode.IncludeFactoredAlts() ? ATNSimulator.RuleLfVariantMarker : ATNSimulator.RuleNolfVariantMarker; element.SetText(element.Text + marker + factoredRule); GrammarAST root = (GrammarAST)adaptor.Nil(); if (includeFactoredElement) { Debug.Assert(mode.IncludeFactoredAlts()); RuleRefAST factoredRuleRef = new RuleRefAST(adaptor.CreateToken(ANTLRParser.RULE_REF, factoredRule)); factoredRuleRef.SetOption(SUPPRESS_ACCESSOR, (GrammarAST)adaptor.Create(ANTLRParser.ID, "true")); Rule factoredRuleDef = _rules[factoredRule]; if (factoredRuleDef is LeftRecursiveRule) { factoredRuleRef.SetOption(LeftRecursiveRuleTransformer.PRECEDENCE_OPTION_NAME, (GrammarAST)adaptor.Create(ANTLRParser.INT, "0")); } if (factoredRuleDef.args != null && factoredRuleDef.args.Size() > 0) { throw new NotImplementedException("Cannot left-factor rules with arguments yet."); } adaptor.AddChild(root, factoredRuleRef); } adaptor.AddChild(root, element); return(root); } case ANTLRParser.BLOCK: { GrammarAST cloned = element.DupTree(); if (!TranslateLeftFactoredDecision(cloned, factoredRule, variant, mode, includeFactoredElement)) { return(null); } if (cloned.ChildCount != 1) { return(null); } GrammarAST root = (GrammarAST)adaptor.Nil(); for (int i = 0; i < cloned.GetChild(0).ChildCount; i++) { adaptor.AddChild(root, cloned.GetChild(0).GetChild(i)); } return(root); } case ANTLRParser.POSITIVE_CLOSURE: { /* a+ * * => * * factoredElement a_factored a* */ GrammarAST originalChildElement = (GrammarAST)element.GetChild(0); GrammarAST translatedElement = TranslateLeftFactoredElement(originalChildElement.DupTree(), factoredRule, variant, mode, includeFactoredElement); if (translatedElement == null) { return(null); } GrammarAST closure = new StarBlockAST(ANTLRParser.CLOSURE, adaptor.CreateToken(ANTLRParser.CLOSURE, "CLOSURE"), null); adaptor.AddChild(closure, originalChildElement); GrammarAST root = (GrammarAST)adaptor.Nil(); if (mode.IncludeFactoredAlts()) { if (includeFactoredElement) { object factoredElement = adaptor.DeleteChild(translatedElement, 0); adaptor.AddChild(root, factoredElement); } } adaptor.AddChild(root, translatedElement); adaptor.AddChild(root, closure); return(root); } case ANTLRParser.CLOSURE: case ANTLRParser.OPTIONAL: // not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.DOT: // ref to imported grammar, not yet supported if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.ACTION: case ANTLRParser.SEMPRED: if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.WILDCARD: case ANTLRParser.STRING_LITERAL: case ANTLRParser.TOKEN_REF: case ANTLRParser.NOT: // terminals if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); case ANTLRParser.EPSILON: // empty tree if (mode.IncludeUnfactoredAlts()) { return(element); } return(null); default: // unknown return(null); } }
protected virtual bool TranslateLeftFactoredDecision(GrammarAST block, string factoredRule, bool variant, DecisionFactorMode mode, bool includeFactoredElement) { if (mode == DecisionFactorMode.PARTIAL_UNFACTORED && includeFactoredElement) { throw new ArgumentException("Cannot include the factored element in unfactored alternatives."); } else if (mode == DecisionFactorMode.COMBINED_FACTOR && !includeFactoredElement) { throw new ArgumentException("Cannot return a combined answer without the factored element."); } if (!ExpandOptionalQuantifiersForBlock(block, variant)) { return(false); } IList <GrammarAST> alternatives = block.GetAllChildrenWithType(ANTLRParser.ALT); GrammarAST[] factoredAlternatives = new GrammarAST[alternatives.Count]; GrammarAST[] unfactoredAlternatives = new GrammarAST[alternatives.Count]; IntervalSet factoredIntervals = new IntervalSet(); IntervalSet unfactoredIntervals = new IntervalSet(); for (int i = 0; i < alternatives.Count; i++) { GrammarAST alternative = alternatives[i]; if (mode.IncludeUnfactoredAlts()) { GrammarAST unfactoredAlt = TranslateLeftFactoredAlternative(alternative.DupTree(), factoredRule, variant, DecisionFactorMode.PARTIAL_UNFACTORED, false); unfactoredAlternatives[i] = unfactoredAlt; if (unfactoredAlt != null) { unfactoredIntervals.Add(i); } } if (mode.IncludeFactoredAlts()) { GrammarAST factoredAlt = TranslateLeftFactoredAlternative(alternative, factoredRule, variant, mode == DecisionFactorMode.COMBINED_FACTOR ? DecisionFactorMode.PARTIAL_FACTORED : DecisionFactorMode.FULL_FACTOR, includeFactoredElement); factoredAlternatives[i] = factoredAlt; if (factoredAlt != null) { factoredIntervals.Add(alternative.ChildIndex); } } } if (factoredIntervals.IsNil && !mode.IncludeUnfactoredAlts()) { return(false); } else if (unfactoredIntervals.IsNil && !mode.IncludeFactoredAlts()) { return(false); } if (unfactoredIntervals.IsNil && factoredIntervals.Count == alternatives.Count && mode.IncludeFactoredAlts() && !includeFactoredElement) { for (int i = 0; i < factoredAlternatives.Length; i++) { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.SetChild(block, i, translatedAlt); } return(true); } else if (factoredIntervals.IsNil && unfactoredIntervals.Count == alternatives.Count && mode.IncludeUnfactoredAlts()) { for (int i = 0; i < unfactoredAlternatives.Length; i++) { GrammarAST translatedAlt = unfactoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.SetChild(block, i, translatedAlt); } return(true); } if (mode == DecisionFactorMode.FULL_FACTOR) { return(false); } /* for a, b, c being arbitrary `element` trees, this block performs * this transformation: * * factoredElement a * | factoredElement b * | factoredElement c * | ... * * ==> * * factoredElement (a | b | c | ...) */ GrammarAST newChildren = (GrammarAST)adaptor.Nil(); for (int i = 0; i < alternatives.Count; i++) { if (mode.IncludeFactoredAlts() && factoredIntervals.Contains(i)) { bool combineWithPrevious = i > 0 && factoredIntervals.Contains(i - 1) && (!mode.IncludeUnfactoredAlts() || !unfactoredIntervals.Contains(i - 1)); if (combineWithPrevious) { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } GrammarAST previous = (GrammarAST)newChildren.GetChild(newChildren.ChildCount - 1); #if false if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, previous.ToStringTree()); LOGGER.log(Level.FINE, translatedAlt.ToStringTree()); } #endif if (previous.ChildCount == 1 || previous.GetChild(1).Type != ANTLRParser.BLOCK) { GrammarAST newBlock = new BlockAST(adaptor.CreateToken(ANTLRParser.BLOCK, "BLOCK")); GrammarAST newAlt = new AltAST(adaptor.CreateToken(ANTLRParser.ALT, "ALT")); adaptor.AddChild(newBlock, newAlt); while (previous.ChildCount > 1) { adaptor.AddChild(newAlt, previous.DeleteChild(1)); } if (newAlt.ChildCount == 0) { adaptor.AddChild(newAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(previous, newBlock); } if (translatedAlt.ChildCount == 1 || translatedAlt.GetChild(1).Type != ANTLRParser.BLOCK) { GrammarAST newBlock = new BlockAST(adaptor.CreateToken(ANTLRParser.BLOCK, "BLOCK")); GrammarAST newAlt = new AltAST(adaptor.CreateToken(ANTLRParser.ALT, "ALT")); adaptor.AddChild(newBlock, newAlt); while (translatedAlt.ChildCount > 1) { adaptor.AddChild(newAlt, translatedAlt.DeleteChild(1)); } if (newAlt.ChildCount == 0) { adaptor.AddChild(newAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(translatedAlt, newBlock); } GrammarAST combinedBlock = (GrammarAST)previous.GetChild(1); adaptor.AddChild(combinedBlock, translatedAlt.GetChild(1).GetChild(0)); #if false if (LOGGER.isLoggable(Level.FINE)) { LOGGER.log(Level.FINE, previous.ToStringTree()); } #endif } else { GrammarAST translatedAlt = factoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(newChildren, translatedAlt); } } if (mode.IncludeUnfactoredAlts() && unfactoredIntervals.Contains(i)) { GrammarAST translatedAlt = unfactoredAlternatives[i]; if (translatedAlt.ChildCount == 0) { adaptor.AddChild(translatedAlt, adaptor.Create(ANTLRParser.EPSILON, "EPSILON")); } adaptor.AddChild(newChildren, translatedAlt); } } adaptor.ReplaceChildren(block, 0, block.ChildCount - 1, newChildren); if (!variant && block.Parent is RuleAST) { RuleAST ruleAST = (RuleAST)block.Parent; string ruleName = ruleAST.GetChild(0).Text; Rule r = _rules[ruleName]; IList <GrammarAST> blockAlts = block.GetAllChildrenWithType(ANTLRParser.ALT); r.numberOfAlts = blockAlts.Count; r.alt = new Alternative[blockAlts.Count + 1]; for (int i = 0; i < blockAlts.Count; i++) { r.alt[i + 1] = new Alternative(r, i + 1); r.alt[i + 1].ast = (AltAST)blockAlts[i]; } } return(true); }
/** Build lexer grammar from combined grammar that looks like: * * (COMBINED_GRAMMAR A * (tokens { X (= Y 'y')) * (OPTIONS (= x 'y')) * (@ members {foo}) * (@ lexer header {package jj;}) * (RULES (RULE .+))) * * Move rules and actions to new tree, don't dup. Split AST apart. * We'll have this Grammar share token symbols later; don't generate * tokenVocab or tokens{} section. Copy over named actions. * * Side-effects: it removes children from GRAMMAR & RULES nodes * in combined AST. Anything cut out is dup'd before * adding to lexer to avoid "who's ur daddy" issues */ public virtual GrammarRootAST ExtractImplicitLexer(Grammar combinedGrammar) { GrammarRootAST combinedAST = combinedGrammar.ast; //tool.log("grammar", "before="+combinedAST.toStringTree()); GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.Token.InputStream); GrammarAST[] elements = combinedAST.GetChildrenAsArray(); // MAKE A GRAMMAR ROOT and ID string lexerName = combinedAST.GetChild(0).Text + "Lexer"; GrammarRootAST lexerAST = new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR, "LEXER_GRAMMAR"), combinedGrammar.ast.tokenStream); lexerAST.grammarType = ANTLRParser.LEXER; lexerAST.Token.InputStream = combinedAST.Token.InputStream; lexerAST.AddChild((ITree)adaptor.Create(ANTLRParser.ID, lexerName)); // COPY OPTIONS GrammarAST optionsRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.OPTIONS); if (optionsRoot != null && optionsRoot.ChildCount != 0) { GrammarAST lexerOptionsRoot = (GrammarAST)adaptor.DupNode(optionsRoot); lexerAST.AddChild(lexerOptionsRoot); GrammarAST[] options = optionsRoot.GetChildrenAsArray(); foreach (GrammarAST o in options) { string optionName = o.GetChild(0).Text; if (Grammar.lexerOptions.Contains(optionName) && !Grammar.doNotCopyOptionsToLexer.Contains(optionName)) { GrammarAST optionTree = (GrammarAST)adaptor.DupTree(o); lexerOptionsRoot.AddChild(optionTree); lexerAST.SetOption(optionName, (GrammarAST)optionTree.GetChild(1)); } } } // COPY all named actions, but only move those with lexer:: scope IList <GrammarAST> actionsWeMoved = new List <GrammarAST>(); foreach (GrammarAST e in elements) { if (e.Type == ANTLRParser.AT) { lexerAST.AddChild((ITree)adaptor.DupTree(e)); if (e.GetChild(0).Text.Equals("lexer")) { actionsWeMoved.Add(e); } } } foreach (GrammarAST r in actionsWeMoved) { combinedAST.DeleteChild(r); } GrammarAST combinedRulesRoot = (GrammarAST)combinedAST.GetFirstChildWithType(ANTLRParser.RULES); if (combinedRulesRoot == null) { return(lexerAST); } // MOVE lexer rules GrammarAST lexerRulesRoot = (GrammarAST)adaptor.Create(ANTLRParser.RULES, "RULES"); lexerAST.AddChild(lexerRulesRoot); IList <GrammarAST> rulesWeMoved = new List <GrammarAST>(); GrammarASTWithOptions[] rules; if (combinedRulesRoot.ChildCount > 0) { rules = combinedRulesRoot.Children.Cast <GrammarASTWithOptions>().ToArray(); } else { rules = new GrammarASTWithOptions[0]; } foreach (GrammarASTWithOptions r in rules) { string ruleName = r.GetChild(0).Text; if (Grammar.IsTokenName(ruleName)) { lexerRulesRoot.AddChild((ITree)adaptor.DupTree(r)); rulesWeMoved.Add(r); } } foreach (GrammarAST r in rulesWeMoved) { combinedRulesRoot.DeleteChild(r); } // Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if' IList <System.Tuple <GrammarAST, GrammarAST> > litAliases = Grammar.GetStringLiteralAliasesFromLexerRules(lexerAST); ISet <string> stringLiterals = combinedGrammar.GetStringLiterals(); // add strings from combined grammar (and imported grammars) into lexer // put them first as they are keywords; must resolve ambigs to these rules // tool.log("grammar", "strings from parser: "+stringLiterals); int insertIndex = 0; foreach (string lit in stringLiterals) { // if lexer already has a rule for literal, continue if (litAliases != null) { foreach (System.Tuple <GrammarAST, GrammarAST> pair in litAliases) { GrammarAST litAST = pair.Item2; if (lit.Equals(litAST.Text)) { goto continueNextLit; } } } // create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>)) string rname = combinedGrammar.GetStringLiteralLexerRuleName(lit); // can't use wizard; need special node types GrammarAST litRule = new RuleAST(ANTLRParser.RULE); BlockAST blk = new BlockAST(ANTLRParser.BLOCK); AltAST alt = new AltAST(ANTLRParser.ALT); TerminalAST slit = new TerminalAST(new CommonToken(ANTLRParser.STRING_LITERAL, lit)); alt.AddChild(slit); blk.AddChild(alt); CommonToken idToken = new CommonToken(ANTLRParser.TOKEN_REF, rname); litRule.AddChild(new TerminalAST(idToken)); litRule.AddChild(blk); lexerRulesRoot.InsertChild(insertIndex, litRule); // lexerRulesRoot.getChildren().add(0, litRule); lexerRulesRoot.FreshenParentAndChildIndexes(); // reset indexes and set litRule parent // next literal will be added after the one just added insertIndex++; continueNextLit: ; } // TODO: take out after stable if slow lexerAST.SanityCheckParentAndChildIndexes(); combinedAST.SanityCheckParentAndChildIndexes(); // tool.log("grammar", combinedAST.toTokenString()); combinedGrammar.tool.Log("grammar", "after extract implicit lexer =" + combinedAST.ToStringTree()); combinedGrammar.tool.Log("grammar", "lexer =" + lexerAST.ToStringTree()); if (lexerRulesRoot.ChildCount == 0) { return(null); } return(lexerAST); }