/** From (A)* build * * |---| * v | * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1) * | ^ * o---------| (optional branch is 2nd alt of optional block containing A+) * * Meaning that the last (end) NFAState in A points back to A's * left side NFAState and we add 3 new NFAStates (the * optional branch is built just like an optional subrule). * See the Aplus() method for more on the loop back Transition. * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we * can detect nested (A*)* loops and insert an extra node. Previously, * two blocks shared same EOB node. * * There are 2 or 3 decision points in a A*. If A is not a block (i.e., * it only has one alt), then there are two decisions: the optional bypass * and then loopback. If A is a block of alts, then there are three * decisions: bypass, loopback, and A's decision point. * * Note that the optional bypass must be outside the loop as (A|B)* is * not the same thing as (A|B|)+. * * This is an accurate NFA representation of the meaning of (A)*, but * for generating code, I don't need a DFA for the optional branch by * virtue of how I generate code. The exit-loopback-branch decision * is sufficient to let me make an appropriate enter, exit, loop * determination. See codegen.g */ public virtual StateCluster BuildAstar( StateCluster A ) { NFAState bypassDecisionState = NewState(); bypassDecisionState.Description = "enter loop path of ()* block"; NFAState optionalAlt = NewState(); optionalAlt.Description = "epsilon path of ()* block"; NFAState blockEndNFAState = NewState(); blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; // don't reuse A.right as loopback if it's right edge of another block if ( A.Right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { // nested A* so make another tail node to be the loop back // instead of the usual A.right which is the EOB for inner loop NFAState extraRightEdge = NewState(); TransitionBetweenStates( A.Right, extraRightEdge, Label.EPSILON ); A.Right = extraRightEdge; } // convert A's end block to loopback A.Right.Description = "()* loopback"; // Transition 1 to actual block of stuff TransitionBetweenStates( bypassDecisionState, A.Left, Label.EPSILON ); // Transition 2 optional to bypass TransitionBetweenStates( bypassDecisionState, optionalAlt, Label.EPSILON ); TransitionBetweenStates( optionalAlt, blockEndNFAState, Label.EPSILON ); // Transition 1 of end block exits TransitionBetweenStates( A.Right, blockEndNFAState, Label.EPSILON ); // Transition 2 of end block loops TransitionBetweenStates( A.Right, A.Left, Label.EPSILON ); bypassDecisionState.decisionStateType = NFAState.BYPASS; A.Left.decisionStateType = NFAState.BLOCK_START; A.Right.decisionStateType = NFAState.LOOPBACK; // set EOB markers for Jean A.Left.endOfBlockStateNumber = A.Right.StateNumber; bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.StateNumber; StateCluster g = new StateCluster( bypassDecisionState, blockEndNFAState ); return g; }
private StateCluster HandleEbnfBlock(GrammarAST blk, StateCluster g) { // track decision if > 1 alts if (grammar.GetNumberOfAltsForDecisionNFA(g.Left) > 1) { g.Left.Description = grammar.GrammarTreeToString(blk, false); g.Left.SetDecisionASTNode(blk); int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blk); } return g; }
/** From (A)+ build * * |---| (Transition 2 from A.right points at alt 1) * v | (follow of loop is Transition 1) * o->o-A-o->o * * Meaning that the last NFAState in A points back to A's left Transition NFAState * and we add a new begin/end NFAState. A can be single alternative or * multiple. * * During analysis we'll call the follow link (transition 1) alt n+1 for * an n-alt A block. */ public virtual StateCluster BuildAplus( StateCluster A ) { NFAState left = NewState(); NFAState blockEndNFAState = NewState(); blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; // don't reuse A.right as loopback if it's right edge of another block if ( A.Right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { // nested A* so make another tail node to be the loop back // instead of the usual A.right which is the EOB for inner loop NFAState extraRightEdge = NewState(); TransitionBetweenStates( A.Right, extraRightEdge, Label.EPSILON ); A.Right = extraRightEdge; } TransitionBetweenStates( A.Right, blockEndNFAState, Label.EPSILON ); // follow is Transition 1 // turn A's block end into a loopback (acts like alt 2) TransitionBetweenStates( A.Right, A.Left, Label.EPSILON ); // loop back Transition 2 TransitionBetweenStates( left, A.Left, Label.EPSILON ); A.Right.decisionStateType = NFAState.LOOPBACK; A.Left.decisionStateType = NFAState.BLOCK_START; // set EOB markers for Jean A.Left.endOfBlockStateNumber = A.Right.StateNumber; StateCluster g = new StateCluster( left, blockEndNFAState ); return g; }
/** Build an atom with all possible values in its label */ public virtual StateCluster BuildWildcard( GrammarAST associatedAST ) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label(IntervalSet.Of( _nfa.Grammar.TokenTypes )); // char or tokens Transition e = new Transition( label, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** Can only complement block of simple alts; can complement build_Set() * result, that is. Get set and complement, replace old with complement. */ public StateCluster BuildAlternativeBlockComplement( StateCluster blk ) { State s0 = blk.left; IIntSet set = getCollapsedBlockAsSet( s0 ); if ( set != null ) { // if set is available, then structure known and blk is a set set = nfa.grammar.complement( set ); Label label = s0.getTransition( 0 ).target.getTransition( 0 ).label; label.Set = set; } return blk; }
public virtual StateCluster BuildRange( int a, int b ) { NFAState left = NewState(); NFAState right = NewState(); Label label = new Label( IntervalSet.Of( a, b ) ); Transition e = new Transition( label, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** Build what amounts to an epsilon transition with a semantic * predicate action. The pred is a pointer into the AST of * the SEMPRED token. */ public virtual StateCluster BuildSemanticPredicate( GrammarAST pred ) { // don't count syn preds if ( !pred.Text.StartsWith( Grammar.SynpredRulePrefix, StringComparison.OrdinalIgnoreCase ) ) { _nfa.Grammar.numberOfSemanticPredicates++; } NFAState left = NewState(); NFAState right = NewState(); Transition e = new Transition( new PredicateLabel( pred ), right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
private void HandleSetElementSet(IIntSet elements, StateCluster g) { Transition setTrans = g.Left.GetTransition(0); elements.AddAll(setTrans.Label.Set); }
private StateCluster HandleTreeAfterLastElement(GrammarAST start, StateCluster g, GrammarAST lastElementStart, StateCluster down) { StateCluster up = factory.BuildAtom(Label.UP, lastElementStart); //el.followingNFAState = up.Right; g = factory.BuildAB(g, up); // tree roots point at right edge of DOWN for LOOK computation later start.NFATreeDownState = down.Left; return g; }
private void HandleNotAtomEnd(GrammarAST notNode, StateCluster g) { notNode.followingNFAState = g.Right; }
private void HandleRule(GrammarAST start, StateCluster g, GrammarAST blockStart, GrammarAST id) { if (blockStart.SetValue != null) { // if block comes back as a set not BLOCK, make it // a single ALT block g = factory.BuildAlternativeBlockFromSet(g); } if (Rule.GetRuleType(currentRuleName) == RuleType.Parser || grammar.type == GrammarType.Lexer) { // attach start node to block for this rule Rule thisR = grammar.GetLocallyDefinedRule(currentRuleName); NFAState start2 = thisR.StartState; start2.associatedASTNode = id; start2.AddTransition(new Transition(Label.EPSILON, g.Left)); // track decision if > 1 alts if (grammar.GetNumberOfAltsForDecisionNFA(g.Left) > 1) { g.Left.Description = grammar.GrammarTreeToString(start, false); g.Left.SetDecisionASTNode(blockStart); int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blockStart); } // hook to end of rule node NFAState end = thisR.StopState; g.Right.AddTransition(new Transition(Label.EPSILON, end)); } }
private StateCluster HandleEbnfSet(StateCluster g) { return g; }
private StateCluster HandleEbnfPositiveClosureBlock(GrammarAST start, GrammarAST blk, StateCluster bg) { GrammarAST eob = blk.LastChild; if (blk.SetValue != null) { bg = factory.BuildAlternativeBlockFromSet(bg); } StateCluster g = factory.BuildAplus(bg); // don't make a decision on left edge, can reuse loop end decision // track the loop back / exit decision point bg.Right.Description = "()+ loopback of " + grammar.GrammarTreeToString(start, false); int d = grammar.AssignDecisionNumber(bg.Right); grammar.SetDecisionNFA(d, bg.Right); grammar.SetDecisionBlockAST(d, blk); bg.Right.SetDecisionASTNode(eob); // make block entry state also have same decision for interpreting grammar NFAState altBlockState = (NFAState)g.Left.GetTransition(0).Target; altBlockState.SetDecisionASTNode(start); altBlockState.DecisionNumber = d; return g; }
private StateCluster HandleEbnfOptionalBlock(GrammarAST start, GrammarAST blk, StateCluster bg) { if (blk.SetValue != null) { // if block comes back SET not BLOCK, make it // a single ALT block bg = factory.BuildAlternativeBlockFromSet(bg); } StateCluster g = factory.BuildAoptional(bg); g.Left.Description = grammar.GrammarTreeToString(start, false); // there is always at least one alt even if block has just 1 alt int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blk); g.Left.SetDecisionASTNode(start); return g; }
/** From label A build Graph o-A->o */ public virtual StateCluster BuildAtom( int label, GrammarAST associatedAST ) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; TransitionBetweenStates( left, right, label ); StateCluster g = new StateCluster( left, right ); return g; }
private StateCluster HandleTreeElement(StateCluster g, StateCluster element) { return factory.BuildAB(g, element); }
/** From an empty alternative build StateCluster o-e->o */ public virtual StateCluster BuildEpsilon() { NFAState left = NewState(); NFAState right = NewState(); TransitionBetweenStates( left, right, Label.EPSILON ); StateCluster g = new StateCluster( left, right ); return g; }
private StateCluster HandleTreeFirstElement(GrammarAST firstElementStart, StateCluster element, out StateCluster down) { down = factory.BuildAtom(Label.DOWN, firstElementStart); // TODO set following states for imaginary nodes? //el.followingNFAState = down.Right; return factory.BuildAB(element, down); }
/** For reference to rule r, build * * o-e->(r) o * * where (r) is the start of rule r and the trailing o is not linked * to from rule ref state directly (it's done thru the transition(0) * RuleClosureTransition. * * If the rule r is just a list of tokens, it's block will be just * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing * the rule reference, but i'm not doing this yet as I'm not sure * it would help much in the NFA->DFA construction. * * TODO add to codegen: collapse alt blks that are sets into single matchSet */ public virtual StateCluster BuildRuleRef( Rule refDef, NFAState ruleStart ) { //System.Console.Out.WriteLine( "building ref to rule " + nfa.grammar.name + "." + refDef.name ); NFAState left = NewState(); //left.Description = "ref to " + ruleStart.Description; NFAState right = NewState(); //right.Description = "NFAState following ref to " + ruleStart.Description; Transition e = new RuleClosureTransition( refDef, ruleStart, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** From A|B|..|Z alternative block build * * o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts) * | ^ * o->o-B->o--| * | | * ... | * | | * o->o-Z->o--| * * So every alternative gets begin NFAState connected by epsilon * and every alt right side points at a block end NFAState. There is a * new NFAState in the NFAState in the StateCluster for each alt plus one for the * end NFAState. * * Special case: only one alternative: don't make a block with alt * begin/end. * * Special case: if just a list of tokens/chars/sets, then collapse * to a single edge'd o-set->o graph. * * Set alt number (1..n) in the left-Transition NFAState. */ public virtual StateCluster BuildAlternativeBlock( ICollection<StateCluster> alternativeStateClusters ) { StateCluster result = null; if ( alternativeStateClusters == null || alternativeStateClusters.Count == 0 ) { return null; } // single alt case if ( alternativeStateClusters.Count == 1 ) { // single alt, no decision, just return only alt state cluster StateCluster g = alternativeStateClusters.First(); NFAState startOfAlt = NewState(); // must have this no matter what TransitionBetweenStates( startOfAlt, g.Left, Label.EPSILON ); //System.Console.Out.WriteLine( "### opt saved start/stop end in (...)" ); return new StateCluster( startOfAlt, g.Right ); } // even if we can collapse for lookahead purposes, we will still // need to predict the alts of this subrule in case there are actions // etc... This is the decision that is pointed to from the AST node // (always) NFAState prevAlternative = null; // tracks prev so we can link to next alt NFAState firstAlt = null; NFAState blockEndNFAState = NewState(); blockEndNFAState.Description = "end block"; int altNum = 1; foreach ( StateCluster g in alternativeStateClusters ) { // add begin NFAState for this alt connected by epsilon NFAState left = NewState(); left.Description = "alt " + altNum + " of ()"; TransitionBetweenStates( left, g.Left, Label.EPSILON ); TransitionBetweenStates( g.Right, blockEndNFAState, Label.EPSILON ); // Are we the first alternative? if ( firstAlt == null ) { firstAlt = left; // track extreme left node of StateCluster } else { // if not first alternative, must link to this alt from previous TransitionBetweenStates( prevAlternative, left, Label.EPSILON ); } prevAlternative = left; altNum++; } // return StateCluster pointing representing entire block // Points to first alt NFAState on left, block end on right result = new StateCluster( firstAlt, blockEndNFAState ); firstAlt.decisionStateType = NFAState.BLOCK_START; // set EOB markers for Jean firstAlt.endOfBlockStateNumber = blockEndNFAState.StateNumber; return result; }
/** From set build single edge graph o->o-set->o. To conform to * what an alt block looks like, must have extra state on left. */ public virtual StateCluster BuildSet( IIntSet set, GrammarAST associatedAST ) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label( set ); Transition e = new Transition( label, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** From a set ('a'|'b') build * * o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts) */ public virtual StateCluster BuildAlternativeBlockFromSet( StateCluster set ) { if ( set == null ) { return null; } // single alt, no decision, just return only alt state cluster NFAState startOfAlt = NewState(); // must have this no matter what TransitionBetweenStates( startOfAlt, set.Left, Label.EPSILON ); return new StateCluster( startOfAlt, set.Right ); }
/** Optimize an alternative (list of grammar elements). * * Walk the chain of elements (which can be complicated loop blocks...) * and throw away any epsilon transitions used to link up simple elements. * * This only removes 195 states from the java.g's NFA, but every little * bit helps. Perhaps I can improve in the future. */ public virtual void OptimizeAlternative( StateCluster alt ) { NFAState s = alt.Left; while ( s != alt.Right ) { // if it's a block element, jump over it and continue if ( s.endOfBlockStateNumber != State.INVALID_STATE_NUMBER ) { s = _nfa.GetState( s.endOfBlockStateNumber ); continue; } Transition t = s.transition[0]; if ( t is RuleClosureTransition ) { s = ( (RuleClosureTransition)t ).FollowState; continue; } if ( t.Label.IsEpsilon && !t.Label.IsAction && s.NumberOfTransitions == 1 ) { // bypass epsilon transition and point to what the epsilon's // target points to unless that epsilon transition points to // a block or loop etc.. Also don't collapse epsilons that // point at the last node of the alt. Don't collapse action edges NFAState epsilonTarget = (NFAState)t.Target; if ( epsilonTarget.endOfBlockStateNumber == State.INVALID_STATE_NUMBER && epsilonTarget.transition[0] != null ) { s.SetTransition0( epsilonTarget.transition[0] ); //System.Console.Out.WriteLine( "### opt " + s.stateNumber + "->" + epsilonTarget.transition[0].target.stateNumber ); } } s = (NFAState)t.Target; } }
/** From (A)? build either: * * o--A->o * | ^ * o---->| * * or, if A is a block, just add an empty alt to the end of the block */ public virtual StateCluster BuildAoptional( StateCluster A ) { StateCluster g = null; int n = _nfa.Grammar.GetNumberOfAltsForDecisionNFA( A.Left ); if ( n == 1 ) { // no decision, just wrap in an optional path //NFAState decisionState = newState(); NFAState decisionState = A.Left; // resuse left edge decisionState.Description = "only alt of ()? block"; NFAState emptyAlt = NewState(); emptyAlt.Description = "epsilon path of ()? block"; NFAState blockEndNFAState = null; blockEndNFAState = NewState(); TransitionBetweenStates( A.Right, blockEndNFAState, Label.EPSILON ); blockEndNFAState.Description = "end ()? block"; //transitionBetweenStates(decisionState, A.left, Label.EPSILON); TransitionBetweenStates( decisionState, emptyAlt, Label.EPSILON ); TransitionBetweenStates( emptyAlt, blockEndNFAState, Label.EPSILON ); // set EOB markers for Jean decisionState.endOfBlockStateNumber = blockEndNFAState.StateNumber; blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; g = new StateCluster( decisionState, blockEndNFAState ); } else { // a decision block, add an empty alt NFAState lastRealAlt = _nfa.Grammar.GetNFAStateForAltOfDecision( A.Left, n ); NFAState emptyAlt = NewState(); emptyAlt.Description = "epsilon path of ()? block"; TransitionBetweenStates( lastRealAlt, emptyAlt, Label.EPSILON ); TransitionBetweenStates( emptyAlt, A.Right, Label.EPSILON ); // set EOB markers for Jean (I think this is redundant here) A.Left.endOfBlockStateNumber = A.Right.StateNumber; A.Right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; g = A; // return same block, but now with optional last path } g.Left.decisionStateType = NFAState.OPTIONAL_BLOCK_START; return g; }
/** From A B build A-e->B (that is, build an epsilon arc from right * of A to left of B). * * As a convenience, return B if A is null or return A if B is null. */ public virtual StateCluster BuildAB( StateCluster A, StateCluster B ) { if ( A == null ) { return B; } if ( B == null ) { return A; } TransitionBetweenStates( A.Right, B.Left, Label.EPSILON ); StateCluster g = new StateCluster( A.Left, B.Right ); return g; }
private StateCluster HandleAlternativeEnd(StateCluster g) { if (g == null) { // if alt was a list of actions or whatever g = factory.BuildEpsilon(); } else { factory.OptimizeAlternative(g); } return g; }