public LookaheadSet Look(NFAState s) { if (NFAToDFAConverter.debug) { Console.Out.WriteLine("> LOOK(" + s + ")"); } _lookBusy.Clear(); LookaheadSet look = FirstCore(s, true); // FOLLOW makes no sense (at the moment!) for lexical rules. if (_grammar.type != GrammarType.Lexer && look.Member(Label.EOR_TOKEN_TYPE)) { // avoid altering FIRST reset as it is cached LookaheadSet f = Follow(s.enclosingRule); f.OrInPlace(look); f.Remove(Label.EOR_TOKEN_TYPE); look = f; //look.orInPlace(FOLLOW(s.enclosingRule)); } else if (_grammar.type == GrammarType.Lexer && look.Member(Label.EOT)) { // if this has EOT, lookahead is all char (all char can follow rule) //look = new LookaheadSet(Label.EOT); look = new LookaheadSet(IntervalSet.COMPLETE_SET); } if (NFAToDFAConverter.debug) { Console.Out.WriteLine("< LOOK(" + s + ")=" + look.ToString(_grammar)); } return(look); }
public virtual LookaheadSet Intersection(LookaheadSet s) { IIntSet i = this.tokenTypeSet.And(s.tokenTypeSet); LookaheadSet intersection = new LookaheadSet(i); return(intersection); }
/** From an NFA state, s, find the set of all labels reachable from s. * Used to compute follow sets for error recovery. Never computes * a FOLLOW operation. FIRST stops at end of rules, returning EOR, unless * invoked from another rule. I.e., routine properly handles * * a : b A ; * * where b is nullable. * * We record with EOR_TOKEN_TYPE if we hit the end of a rule so we can * know at runtime (when these sets are used) to start walking up the * follow chain to compute the real, correct follow set (as opposed to * the FOLLOW, which is a superset). * * This routine will only be used on parser and tree parser grammars. */ public LookaheadSet First(NFAState s) { //[email protected]("> FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule); _lookBusy.Clear(); LookaheadSet look = FirstCore(s, false); //[email protected]("< FIRST("+s.enclosingRule.name+") in rule "+s.enclosingRule+"="+look.toString(this.grammar)); return(look); }
public LookaheadSet Follow(Rule r) { //[email protected]("> FOLLOW("+r.name+") in rule "+r.startState.enclosingRule); LookaheadSet f = _followCache.get(r); if (f != null) { return(f); } f = FirstCore(r.stopState, true); _followCache[r] = f; //[email protected]("< FOLLOW("+r+") in rule "+r.startState.enclosingRule+"="+f.toString(this.grammar)); return(f); }
/** From list of lookahead sets (one per alt in decision), create * an LL(1) DFA. One edge per set. * * s0-{alt1}->:o=>1 * | \ * | -{alt2}->:o=>2 * | * ... */ public LL1DFA( int decisionNumber, NFAState decisionStartState, LookaheadSet[] altLook ) : base(decisionNumber, decisionStartState) { DFAState s0 = NewState(); StartState = s0; UnreachableAlts.Clear(); for ( int alt = 1; alt < altLook.Length; alt++ ) { DFAState acceptAltState = NewState(); acceptAltState.IsAcceptState = true; SetAcceptState( alt, acceptAltState ); acceptAltState.LookaheadDepth = 1; acceptAltState.CachedUniquelyPredicatedAlt = alt; Label e = GetLabelForSet( altLook[alt].TokenTypeSet ); s0.AddTransition( acceptAltState, e ); } }
public virtual void ComputeRuleFIRSTSets() { if (NumberOfDecisions == 0) { createNFAs(); } for (Iterator it = getRules().iterator(); it.hasNext();) { Rule r = (Rule)it.next(); if (r.isSynPred) { continue; } LookaheadSet s = FIRST(r); [email protected]("FIRST(" + r.name + ")=" + s); } }
/** From list of lookahead sets (one per alt in decision), create * an LL(1) DFA. One edge per set. * * s0-{alt1}->:o=>1 * | \ * | -{alt2}->:o=>2 * | * ... */ public LL1DFA( int decisionNumber, NFAState decisionStartState, LookaheadSet[] altLook ) { DFAState s0 = NewState(); startState = s0; nfa = decisionStartState.nfa; NumberOfAlts = nfa.grammar.GetNumberOfAltsForDecisionNFA( decisionStartState ); this.decisionNumber = decisionNumber; this.NFADecisionStartState = decisionStartState; InitAltRelatedInfo(); UnreachableAlts = null; for ( int alt = 1; alt < altLook.Length; alt++ ) { DFAState acceptAltState = NewState(); acceptAltState.acceptState = true; SetAcceptState( alt, acceptAltState ); acceptAltState.LookaheadDepth = 1; acceptAltState.cachedUniquelyPredicatedAlt = alt; Label e = GetLabelForSet( altLook[alt].tokenTypeSet ); s0.AddTransition( acceptAltState, e ); } }
public virtual LookaheadSet Intersection( LookaheadSet s ) { IIntSet i = this.tokenTypeSet.And( s.tokenTypeSet ); LookaheadSet intersection = new LookaheadSet( i ); return intersection; }
public virtual LookaheadSet Subtract(LookaheadSet other) { return(new LookaheadSet(this.tokenTypeSet.Subtract(other.tokenTypeSet))); }
public virtual LookaheadSet Or(LookaheadSet other) { return(new LookaheadSet(tokenTypeSet.Or(other.tokenTypeSet))); }
public virtual void OrInPlace(LookaheadSet other) { this.tokenTypeSet.AddAll(other.tokenTypeSet); }
public LookaheadSet(LookaheadSet other) : this() { this.tokenTypeSet.AddAll(other.tokenTypeSet); }
public virtual DFA CreateLL_1_LookaheadDFA( int decision ) { Decision d = GetDecision( decision ); string enclosingRule = d.startState.enclosingRule.Name; Rule r = d.startState.enclosingRule; NFAState decisionStartState = GetDecisionNFAStartState( decision ); if ( composite.WatchNFAConversion ) { Console.Out.WriteLine( "--------------------\nattempting LL(1) DFA (d=" + decisionStartState.DecisionNumber + ") for " + decisionStartState.Description ); } if ( r.IsSynPred && !synPredNamesUsedInDFA.Contains( enclosingRule ) ) { return null; } // compute lookahead for each alt int numAlts = GetNumberOfAltsForDecisionNFA( decisionStartState ); LookaheadSet[] altLook = new LookaheadSet[numAlts + 1]; for ( int alt = 1; alt <= numAlts; alt++ ) { int walkAlt = decisionStartState.TranslateDisplayAltToWalkAlt( alt ); NFAState altLeftEdge = GetNFAStateForAltOfDecision( decisionStartState, walkAlt ); NFAState altStartState = (NFAState)altLeftEdge.transition[0].Target; //[email protected]("alt "+alt+" start state = "+altStartState.stateNumber); altLook[alt] = ll1Analyzer.Look( altStartState ); //[email protected]("alt "+alt+": "+altLook[alt].toString(this)); } // compare alt i with alt j for disjointness bool decisionIsLL_1 = true; for ( int i = 1; i <= numAlts; i++ ) { for ( int j = i + 1; j <= numAlts; j++ ) { /* [email protected]("compare "+i+", "+j+": "+ altLook[i].toString(this)+" with "+ altLook[j].toString(this)); */ LookaheadSet collision = altLook[i].Intersection( altLook[j] ); if ( !collision.IsNil ) { //[email protected]("collision (non-LL(1)): "+collision.toString(this)); decisionIsLL_1 = false; goto outer; } } } outer: bool foundConfoundingPredicate = ll1Analyzer.DetectConfoundingPredicates( decisionStartState ); if ( decisionIsLL_1 && !foundConfoundingPredicate ) { // build an LL(1) optimized DFA with edge for each altLook[i] if ( NFAToDFAConverter.debug ) { Console.Out.WriteLine( "decision " + decision + " is simple LL(1)" ); } DFA lookaheadDFA2 = new LL1DFA( decision, decisionStartState, altLook ); SetLookaheadDFA( decision, lookaheadDFA2 ); UpdateLineColumnToLookaheadDFAMap( lookaheadDFA2 ); return lookaheadDFA2; } // not LL(1) but perhaps we can solve with simplified predicate search // even if k=1 set manually, only resolve here if we have preds; i.e., // don't resolve etc... /* SemanticContext visiblePredicates = ll1Analyzer.getPredicates(decisionStartState); boolean foundConfoundingPredicate = ll1Analyzer.detectConfoundingPredicates(decisionStartState); */ // exit if not forced k=1 or we found a predicate situation we // can't handle: predicates in rules invoked from this decision. if ( GetUserMaxLookahead( decision ) != 1 || // not manually set to k=1 !GetAutoBacktrackMode( decision ) || foundConfoundingPredicate ) { //[email protected]("trying LL(*)"); return null; } IList<IIntSet> edges = new List<IIntSet>(); for ( int i = 1; i < altLook.Length; i++ ) { LookaheadSet s = altLook[i]; edges.Add( (IntervalSet)s.TokenTypeSet ); } IList<IIntSet> disjoint = MakeEdgeSetsDisjoint( edges ); //[email protected]("disjoint="+disjoint); MultiMap<IntervalSet, int> edgeMap = new MultiMap<IntervalSet, int>(); for ( int i = 0; i < disjoint.Count; i++ ) { IntervalSet ds = (IntervalSet)disjoint[i]; for ( int alt = 1; alt < altLook.Length; alt++ ) { LookaheadSet look = altLook[alt]; if ( !ds.And( look.TokenTypeSet ).IsNil ) { edgeMap.Map( ds, alt ); } } } //[email protected]("edge map: "+edgeMap); // TODO: how do we know we covered stuff? // build an LL(1) optimized DFA with edge for each altLook[i] DFA lookaheadDFA = new LL1DFA( decision, decisionStartState, edgeMap ); SetLookaheadDFA( decision, lookaheadDFA ); // create map from line:col to decision DFA (for ANTLRWorks) UpdateLineColumnToLookaheadDFAMap( lookaheadDFA ); return lookaheadDFA; }
public LookaheadSet( LookaheadSet other ) : this() { this.tokenTypeSet.AddAll( other.tokenTypeSet ); }
public virtual LookaheadSet Subtract( LookaheadSet other ) { return new LookaheadSet( this.tokenTypeSet.Subtract( other.tokenTypeSet ) ); }
public virtual void OrInPlace( LookaheadSet other ) { this.tokenTypeSet.AddAll( other.tokenTypeSet ); }
public virtual LookaheadSet Or( LookaheadSet other ) { return new LookaheadSet( tokenTypeSet.Or( other.tokenTypeSet ) ); }
protected virtual LookaheadSet FirstCore(NFAState s, bool chaseFollowTransitions) { /* * [email protected]("_LOOK("+s+") in rule "+s.enclosingRule); * if ( s.transition[0] instanceof RuleClosureTransition ) { * [email protected]("go to rule "+((NFAState)s.transition[0].target).enclosingRule); * } */ if (!chaseFollowTransitions && s.IsAcceptState) { if (_grammar.type == GrammarType.Lexer) { // FOLLOW makes no sense (at the moment!) for lexical rules. // assume all char can follow return(new LookaheadSet(IntervalSet.COMPLETE_SET)); } return(new LookaheadSet(Label.EOR_TOKEN_TYPE)); } if (_lookBusy.Contains(s)) { // return a copy of an empty set; we may modify set inline return(new LookaheadSet()); } _lookBusy.Add(s); Transition transition0 = s.transition[0]; if (transition0 == null) { return(null); } if (transition0.label.IsAtom) { int atom = transition0.label.Atom; return(new LookaheadSet(atom)); } if (transition0.label.IsSet) { IIntSet sl = transition0.label.Set; return(new LookaheadSet(sl)); } // compute FIRST of transition 0 LookaheadSet tset = null; // if transition 0 is a rule call and we don't want FOLLOW, check cache if (!chaseFollowTransitions && transition0 is RuleClosureTransition) { LookaheadSet prev = _firstCache.get((NFAState)transition0.target); if (prev != null) { tset = new LookaheadSet(prev); } } // if not in cache, must compute if (tset == null) { tset = FirstCore((NFAState)transition0.target, chaseFollowTransitions); // save FIRST cache for transition 0 if rule call if (!chaseFollowTransitions && transition0 is RuleClosureTransition) { _firstCache[(NFAState)transition0.target] = tset; } } // did we fall off the end? if (_grammar.type != GrammarType.Lexer && tset.Member(Label.EOR_TOKEN_TYPE)) { if (transition0 is RuleClosureTransition) { // we called a rule that found the end of the rule. // That means the rule is nullable and we need to // keep looking at what follows the rule ref. E.g., // a : b A ; where b is nullable means that LOOK(a) // should include A. RuleClosureTransition ruleInvocationTrans = (RuleClosureTransition)transition0; // remove the EOR and get what follows //tset.remove(Label.EOR_TOKEN_TYPE); NFAState following = (NFAState)ruleInvocationTrans.followState; LookaheadSet fset = FirstCore(following, chaseFollowTransitions); fset.OrInPlace(tset); // tset cached; or into new set fset.Remove(Label.EOR_TOKEN_TYPE); tset = fset; } } Transition transition1 = s.transition[1]; if (transition1 != null) { LookaheadSet tset1 = FirstCore((NFAState)transition1.target, chaseFollowTransitions); tset1.OrInPlace(tset); // tset cached; or into new set tset = tset1; } return(tset); }
public LookaheadSet Look( NFAState s ) { if ( NFAToDFAConverter.debug ) { Console.Out.WriteLine( "> LOOK(" + s + ")" ); } _lookBusy.Clear(); LookaheadSet look = FirstCore( s, true ); // FOLLOW makes no sense (at the moment!) for lexical rules. if ( _grammar.type != GrammarType.Lexer && look.Member( Label.EOR_TOKEN_TYPE ) ) { // avoid altering FIRST reset as it is cached LookaheadSet f = Follow( s.enclosingRule ); f.OrInPlace( look ); f.Remove( Label.EOR_TOKEN_TYPE ); look = f; //look.orInPlace(FOLLOW(s.enclosingRule)); } else if ( _grammar.type == GrammarType.Lexer && look.Member( Label.EOT ) ) { // if this has EOT, lookahead is all char (all char can follow rule) //look = new LookaheadSet(Label.EOT); look = new LookaheadSet( IntervalSet.COMPLETE_SET ); } if ( NFAToDFAConverter.debug ) { Console.Out.WriteLine( "< LOOK(" + s + ")=" + look.ToString( _grammar ) ); } return look; }
protected virtual LookaheadSet FirstCore( NFAState s, bool chaseFollowTransitions ) { /* [email protected]("_LOOK("+s+") in rule "+s.enclosingRule); if ( s.transition[0] instanceof RuleClosureTransition ) { [email protected]("go to rule "+((NFAState)s.transition[0].target).enclosingRule); } */ if ( !chaseFollowTransitions && s.IsAcceptState ) { if ( _grammar.type == GrammarType.Lexer ) { // FOLLOW makes no sense (at the moment!) for lexical rules. // assume all char can follow return new LookaheadSet( IntervalSet.COMPLETE_SET ); } return new LookaheadSet( Label.EOR_TOKEN_TYPE ); } if ( _lookBusy.Contains( s ) ) { // return a copy of an empty set; we may modify set inline return new LookaheadSet(); } _lookBusy.Add( s ); Transition transition0 = s.transition[0]; if ( transition0 == null ) { return null; } if ( transition0.label.IsAtom ) { int atom = transition0.label.Atom; return new LookaheadSet( atom ); } if ( transition0.label.IsSet ) { IIntSet sl = transition0.label.Set; return new LookaheadSet( sl ); } // compute FIRST of transition 0 LookaheadSet tset = null; // if transition 0 is a rule call and we don't want FOLLOW, check cache if ( !chaseFollowTransitions && transition0 is RuleClosureTransition ) { LookaheadSet prev = _firstCache.get( (NFAState)transition0.target ); if ( prev != null ) { tset = new LookaheadSet( prev ); } } // if not in cache, must compute if ( tset == null ) { tset = FirstCore( (NFAState)transition0.target, chaseFollowTransitions ); // save FIRST cache for transition 0 if rule call if ( !chaseFollowTransitions && transition0 is RuleClosureTransition ) { _firstCache[(NFAState)transition0.target] = tset; } } // did we fall off the end? if ( _grammar.type != GrammarType.Lexer && tset.Member( Label.EOR_TOKEN_TYPE ) ) { if ( transition0 is RuleClosureTransition ) { // we called a rule that found the end of the rule. // That means the rule is nullable and we need to // keep looking at what follows the rule ref. E.g., // a : b A ; where b is nullable means that LOOK(a) // should include A. RuleClosureTransition ruleInvocationTrans = (RuleClosureTransition)transition0; // remove the EOR and get what follows //tset.remove(Label.EOR_TOKEN_TYPE); NFAState following = (NFAState)ruleInvocationTrans.followState; LookaheadSet fset = FirstCore( following, chaseFollowTransitions ); fset.OrInPlace( tset ); // tset cached; or into new set fset.Remove( Label.EOR_TOKEN_TYPE ); tset = fset; } } Transition transition1 = s.transition[1]; if ( transition1 != null ) { LookaheadSet tset1 = FirstCore( (NFAState)transition1.target, chaseFollowTransitions ); tset1.OrInPlace( tset ); // tset cached; or into new set tset = tset1; } return tset; }
/** Error recovery in ANTLR recognizers. * * Based upon original ideas: * * Algorithms + Data Structures = Programs by Niklaus Wirth * * and * * A note on error recovery in recursive descent parsers: * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * Efficient and Comfortable Error Recovery in Recursive Descent Parsers: * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined at run-time * upon error to avoid parsing overhead. */ public virtual void GenerateLocalFollow( GrammarAST referencedElementNode, string referencedElementName, string enclosingRuleName, int elementIndex ) { if (elementIndex < 0) { throw new ArgumentOutOfRangeException("elementIndex", "elementIndex cannot be less than zero."); } /* [email protected]("compute FOLLOW "+grammar.name+"."+referencedElementNode.toString()+ " for "+referencedElementName+"#"+elementIndex +" in "+ enclosingRuleName+ " line="+referencedElementNode.getLine()); */ NFAState followingNFAState = referencedElementNode.followingNFAState; LookaheadSet follow = null; if ( followingNFAState != null ) { // compute follow for this element and, as side-effect, track // the rule LOOK sensitivity. follow = grammar.First( followingNFAState ); } if ( follow == null ) { ErrorManager.InternalError( "no follow state or cannot compute follow" ); follow = new LookaheadSet(); } if ( follow.Member( Label.EOF ) ) { // TODO: can we just remove? Seems needed here: // compilation_unit : global_statement* EOF // Actually i guess we resync to EOF regardless follow.Remove( Label.EOF ); } //[email protected](" "+follow); IList tokenTypeList = null; ulong[] words = null; if ( follow.tokenTypeSet == null ) { words = new ulong[1]; tokenTypeList = new List<object>(); } else { BitSet bits = BitSet.Of( follow.tokenTypeSet ); words = bits.ToPackedArray(); tokenTypeList = follow.tokenTypeSet.ToList(); } // use the target to convert to hex strings (typically) string[] wordStrings = new string[words.Length]; for ( int j = 0; j < words.Length; j++ ) { ulong w = words[j]; wordStrings[j] = target.GetTarget64BitStringFromValue( w ); } recognizerST.SetAttribute( "bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, elementIndex ); outputFileST.SetAttribute( "bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, elementIndex ); headerFileST.SetAttribute( "bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, elementIndex ); }