private void HandleRule(GrammarAST start, StateCluster g, GrammarAST blockStart, GrammarAST id) { if (blockStart.SetValue != null) { // if block comes back as a set not BLOCK, make it // a single ALT block g = factory.BuildAlternativeBlockFromSet(g); } if (Rule.GetRuleType(currentRuleName) == RuleType.Parser || grammar.type == GrammarType.Lexer) { // attach start node to block for this rule Rule thisR = grammar.GetLocallyDefinedRule(currentRuleName); NFAState start2 = thisR.StartState; start2.associatedASTNode = id; start2.AddTransition(new Transition(Label.EPSILON, g.Left)); // track decision if > 1 alts if (grammar.GetNumberOfAltsForDecisionNFA(g.Left) > 1) { g.Left.Description = grammar.GrammarTreeToString(start, false); g.Left.SetDecisionASTNode(blockStart); int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blockStart); } // hook to end of rule node NFAState end = thisR.StopState; g.Right.AddTransition(new Transition(Label.EPSILON, end)); } }
/** Build a subrule matching ^(. .*) (any tree or node). Let's use * (^(. .+) | .) to be safe. */ public StateCluster BuildWildcardTree(GrammarAST associatedAST) { StateCluster wildRoot = BuildWildcard(associatedAST); StateCluster down = BuildAtom(Label.DOWN, associatedAST); wildRoot = BuildAB(wildRoot, down); // hook in; . DOWN // make .+ StateCluster wildChildren = BuildWildcard(associatedAST); wildChildren = BuildAplus(wildChildren); wildRoot = BuildAB(wildRoot, wildChildren); // hook in; . DOWN .+ StateCluster up = BuildAtom(Label.UP, associatedAST); wildRoot = BuildAB(wildRoot, up); // hook in; . DOWN .+ UP // make optional . alt StateCluster optionalNodeAlt = BuildWildcard(associatedAST); //List alts = new List<object>(); var alts = new List <StateCluster>() { wildRoot, optionalNodeAlt }; StateCluster blk = BuildAlternativeBlock(alts); return(blk); }
private StateCluster HandleEbnfPositiveClosureBlock(GrammarAST start, GrammarAST blk, StateCluster bg) { GrammarAST eob = blk.LastChild; if (blk.SetValue != null) { bg = factory.BuildAlternativeBlockFromSet(bg); } StateCluster g = factory.BuildAplus(bg); // don't make a decision on left edge, can reuse loop end decision // track the loop back / exit decision point bg.Right.Description = "()+ loopback of " + grammar.GrammarTreeToString(start, false); int d = grammar.AssignDecisionNumber(bg.Right); grammar.SetDecisionNFA(d, bg.Right); grammar.SetDecisionBlockAST(d, blk); bg.Right.SetDecisionASTNode(eob); // make block entry state also have same decision for interpreting grammar NFAState altBlockState = (NFAState)g.Left.GetTransition(0).Target; altBlockState.SetDecisionASTNode(start); altBlockState.DecisionNumber = d; return(g); }
private StateCluster HandleTreeFirstElement(GrammarAST firstElementStart, StateCluster element, out StateCluster down) { down = factory.BuildAtom(Label.DOWN, firstElementStart); // TODO set following states for imaginary nodes? //el.followingNFAState = down.Right; return factory.BuildAB(element, down); }
/** Optimize an alternative (list of grammar elements). * * Walk the chain of elements (which can be complicated loop blocks...) * and throw away any epsilon transitions used to link up simple elements. * * This only removes 195 states from the java.g's NFA, but every little * bit helps. Perhaps I can improve in the future. */ public virtual void OptimizeAlternative(StateCluster alt) { NFAState s = alt.left; while (s != alt.right) { // if it's a block element, jump over it and continue if (s.endOfBlockStateNumber != State.INVALID_STATE_NUMBER) { s = nfa.GetState(s.endOfBlockStateNumber); continue; } Transition t = s.transition[0]; if (t is RuleClosureTransition) { s = ((RuleClosureTransition)t).followState; continue; } if (t.label.IsEpsilon && !t.label.IsAction && s.NumberOfTransitions == 1) { // bypass epsilon transition and point to what the epsilon's // target points to unless that epsilon transition points to // a block or loop etc.. Also don't collapse epsilons that // point at the last node of the alt. Don't collapse action edges NFAState epsilonTarget = (NFAState)t.target; if (epsilonTarget.endOfBlockStateNumber == State.INVALID_STATE_NUMBER && epsilonTarget.transition[0] != null) { s.SetTransition0(epsilonTarget.transition[0]); //System.Console.Out.WriteLine( "### opt " + s.stateNumber + "->" + epsilonTarget.transition[0].target.stateNumber ); } } s = (NFAState)t.target; } }
/** From (A)+ build * * |---| (Transition 2 from A.right points at alt 1) * v | (follow of loop is Transition 1) * o->o-A-o->o * * Meaning that the last NFAState in A points back to A's left Transition NFAState * and we add a new begin/end NFAState. A can be single alternative or * multiple. * * During analysis we'll call the follow link (transition 1) alt n+1 for * an n-alt A block. */ public virtual StateCluster BuildAplus(StateCluster A) { NFAState left = NewState(); NFAState blockEndNFAState = NewState(); blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; // don't reuse A.right as loopback if it's right edge of another block if (A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK) { // nested A* so make another tail node to be the loop back // instead of the usual A.right which is the EOB for inner loop NFAState extraRightEdge = NewState(); TransitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); A.right = extraRightEdge; } TransitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1 // turn A's block end into a loopback (acts like alt 2) TransitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2 TransitionBetweenStates(left, A.left, Label.EPSILON); A.right.decisionStateType = NFAState.LOOPBACK; A.left.decisionStateType = NFAState.BLOCK_START; // set EOB markers for Jean A.left.endOfBlockStateNumber = A.right.stateNumber; StateCluster g = new StateCluster(left, blockEndNFAState); return(g); }
private StateCluster HandleTreeAfterLastElement(GrammarAST start, StateCluster g, GrammarAST lastElementStart, StateCluster down) { StateCluster up = factory.BuildAtom(Label.UP, lastElementStart); //el.followingNFAState = up.Right; g = factory.BuildAB(g, up); // tree roots point at right edge of DOWN for LOOK computation later start.NFATreeDownState = down.Left; return g; }
/** From an empty alternative build StateCluster o-e->o */ public virtual StateCluster BuildEpsilon() { NFAState left = NewState(); NFAState right = NewState(); TransitionBetweenStates(left, right, Label.EPSILON); StateCluster g = new StateCluster(left, right); return(g); }
public virtual StateCluster BuildRange(int a, int b) { NFAState left = NewState(); NFAState right = NewState(); Label label = new Label(IntervalSet.Of(a, b)); Transition e = new Transition(label, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** From label A build Graph o-A->o */ public virtual StateCluster BuildAtom(int label, GrammarAST associatedAST) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; TransitionBetweenStates(left, right, label); StateCluster g = new StateCluster(left, right); return(g); }
private StateCluster HandleAtomCharLiteral(GrammarAST charLiteral) { if (grammar.type == GrammarType.Lexer) { return(factory.BuildCharLiteralAtom(charLiteral)); } else { StateCluster g = factory.BuildAtom(charLiteral); charLiteral.followingNFAState = g.Right; return(g); } }
private StateCluster HandleAtomStringLiteral(GrammarAST stringLiteral) { if (grammar.type == GrammarType.Lexer) { return factory.BuildStringLiteralAtom(stringLiteral); } else { StateCluster g = factory.BuildAtom(stringLiteral); stringLiteral.followingNFAState = g.Right; return g; } }
private StateCluster HandleAlternativeEnd(StateCluster g) { if (g == null) { // if alt was a list of actions or whatever g = factory.BuildEpsilon(); } else { factory.OptimizeAlternative(g); } return g; }
/** From a set ('a'|'b') build * * o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts) */ public virtual StateCluster BuildAlternativeBlockFromSet(StateCluster set) { if (set == null) { return(null); } // single alt, no decision, just return only alt state cluster NFAState startOfAlt = NewState(); // must have this no matter what TransitionBetweenStates(startOfAlt, set.left, Label.EPSILON); return(new StateCluster(startOfAlt, set.right)); }
/** For reference to rule r, build * * o-e->(r) o * * where (r) is the start of rule r and the trailing o is not linked * to from rule ref state directly (it's done thru the transition(0) * RuleClosureTransition. * * If the rule r is just a list of tokens, it's block will be just * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing * the rule reference, but i'm not doing this yet as I'm not sure * it would help much in the NFA->DFA construction. * * TODO add to codegen: collapse alt blks that are sets into single matchSet */ public virtual StateCluster BuildRuleRef(Rule refDef, NFAState ruleStart) { //System.Console.Out.WriteLine( "building ref to rule " + nfa.grammar.name + "." + refDef.name ); NFAState left = NewState(); //left.Description = "ref to " + ruleStart.Description; NFAState right = NewState(); //right.Description = "NFAState following ref to " + ruleStart.Description; Transition e = new RuleClosureTransition(refDef, ruleStart, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** Can only complement block of simple alts; can complement build_Set() * result, that is. Get set and complement, replace old with complement. */ public StateCluster BuildAlternativeBlockComplement(StateCluster blk) { State s0 = blk.left; IIntSet set = getCollapsedBlockAsSet(s0); if (set != null) { // if set is available, then structure known and blk is a set set = nfa.grammar.complement(set); Label label = s0.getTransition(0).target.getTransition(0).label; label.Set = set; } return(blk); }
private StateCluster HandleEbnfBlock(GrammarAST blk, StateCluster g) { // track decision if > 1 alts if (grammar.GetNumberOfAltsForDecisionNFA(g.Left) > 1) { g.Left.Description = grammar.GrammarTreeToString(blk, false); g.Left.SetDecisionASTNode(blk); int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blk); } return g; }
/** Build an atom with all possible values in its label */ public virtual StateCluster BuildWildcard(GrammarAST associatedAST) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label(nfa.grammar.TokenTypes); // char or tokens Transition e = new Transition(label, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** From A B build A-e->B (that is, build an epsilon arc from right * of A to left of B). * * As a convenience, return B if A is null or return A if B is null. */ public virtual StateCluster BuildAB(StateCluster A, StateCluster B) { if (A == null) { return(B); } if (B == null) { return(A); } TransitionBetweenStates(A.right, B.left, Label.EPSILON); StateCluster g = new StateCluster(A.left, B.right); return(g); }
/** From set build single edge graph o->o-set->o. To conform to * what an alt block looks like, must have extra state on left. */ public virtual StateCluster BuildSet(IIntSet set, GrammarAST associatedAST) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label(set); Transition e = new Transition(label, right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** Build what amounts to an epsilon transition with a semantic * predicate action. The pred is a pointer into the AST of * the SEMPRED token. */ public virtual StateCluster BuildSemanticPredicate(GrammarAST pred) { // don't count syn preds if (!pred.Text.StartsWith(Grammar.SynpredRulePrefix, StringComparison.OrdinalIgnoreCase)) { nfa.grammar.numberOfSemanticPredicates++; } NFAState left = NewState(); NFAState right = NewState(); Transition e = new Transition(new PredicateLabel(pred), right); left.AddTransition(e); StateCluster g = new StateCluster(left, right); return(g); }
/** From (A)? build either: * * o--A->o * | ^ * o---->| * * or, if A is a block, just add an empty alt to the end of the block */ public virtual StateCluster BuildAoptional(StateCluster A) { StateCluster g = null; int n = nfa.grammar.GetNumberOfAltsForDecisionNFA(A.left); if (n == 1) { // no decision, just wrap in an optional path //NFAState decisionState = newState(); NFAState decisionState = A.left; // resuse left edge decisionState.Description = "only alt of ()? block"; NFAState emptyAlt = NewState(); emptyAlt.Description = "epsilon path of ()? block"; NFAState blockEndNFAState = null; blockEndNFAState = NewState(); TransitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); blockEndNFAState.Description = "end ()? block"; //transitionBetweenStates(decisionState, A.left, Label.EPSILON); TransitionBetweenStates(decisionState, emptyAlt, Label.EPSILON); TransitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON); // set EOB markers for Jean decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; g = new StateCluster(decisionState, blockEndNFAState); } else { // a decision block, add an empty alt NFAState lastRealAlt = nfa.grammar.GetNFAStateForAltOfDecision(A.left, n); NFAState emptyAlt = NewState(); emptyAlt.Description = "epsilon path of ()? block"; TransitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON); TransitionBetweenStates(emptyAlt, A.right, Label.EPSILON); // set EOB markers for Jean (I think this is redundant here) A.left.endOfBlockStateNumber = A.right.stateNumber; A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; g = A; // return same block, but now with optional last path } g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START; return(g); }
/** From (A)* build * * |---| * v | * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1) * | ^ * o---------| (optional branch is 2nd alt of optional block containing A+) * * Meaning that the last (end) NFAState in A points back to A's * left side NFAState and we add 3 new NFAStates (the * optional branch is built just like an optional subrule). * See the Aplus() method for more on the loop back Transition. * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we * can detect nested (A*)* loops and insert an extra node. Previously, * two blocks shared same EOB node. * * There are 2 or 3 decision points in a A*. If A is not a block (i.e., * it only has one alt), then there are two decisions: the optional bypass * and then loopback. If A is a block of alts, then there are three * decisions: bypass, loopback, and A's decision point. * * Note that the optional bypass must be outside the loop as (A|B)* is * not the same thing as (A|B|)+. * * This is an accurate NFA representation of the meaning of (A)*, but * for generating code, I don't need a DFA for the optional branch by * virtue of how I generate code. The exit-loopback-branch decision * is sufficient to let me make an appropriate enter, exit, loop * determination. See codegen.g */ public virtual StateCluster BuildAstar(StateCluster A) { NFAState bypassDecisionState = NewState(); bypassDecisionState.Description = "enter loop path of ()* block"; NFAState optionalAlt = NewState(); optionalAlt.Description = "epsilon path of ()* block"; NFAState blockEndNFAState = NewState(); blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; // don't reuse A.right as loopback if it's right edge of another block if (A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK) { // nested A* so make another tail node to be the loop back // instead of the usual A.right which is the EOB for inner loop NFAState extraRightEdge = NewState(); TransitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); A.right = extraRightEdge; } // convert A's end block to loopback A.right.Description = "()* loopback"; // Transition 1 to actual block of stuff TransitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON); // Transition 2 optional to bypass TransitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON); TransitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON); // Transition 1 of end block exits TransitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // Transition 2 of end block loops TransitionBetweenStates(A.right, A.left, Label.EPSILON); bypassDecisionState.decisionStateType = NFAState.BYPASS; A.left.decisionStateType = NFAState.BLOCK_START; A.right.decisionStateType = NFAState.LOOPBACK; // set EOB markers for Jean A.left.endOfBlockStateNumber = A.right.stateNumber; bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState); return(g); }
private StateCluster HandleEbnfOptionalBlock(GrammarAST start, GrammarAST blk, StateCluster bg) { if (blk.SetValue != null) { // if block comes back SET not BLOCK, make it // a single ALT block bg = factory.BuildAlternativeBlockFromSet(bg); } StateCluster g = factory.BuildAoptional(bg); g.Left.Description = grammar.GrammarTreeToString(start, false); // there is always at least one alt even if block has just 1 alt int d = grammar.AssignDecisionNumber(g.Left); grammar.SetDecisionNFA(d, g.Left); grammar.SetDecisionBlockAST(d, blk); g.Left.SetDecisionASTNode(start); return g; }
private StateCluster HandleAtomRuleReference(string scopeName, GrammarAST ruleReference) { NFAState start = grammar.GetRuleStartState(scopeName, ruleReference.Text); if (start != null) { Rule rr = grammar.GetRule(scopeName, ruleReference.Text); StateCluster g = factory.BuildRuleRef(rr, start); ruleReference.followingNFAState = g.Right; ruleReference._nfaStartState = g.Left; if (g.Left.GetTransition(0) is RuleClosureTransition && grammar.type != GrammarType.Lexer) { AddFollowTransition(ruleReference.Text, g.Right); } // else rule ref got inlined to a set return g; } return null; }
private StateCluster HandleAtomTokenReference(string scopeName, GrammarAST tokenReference) { if (grammar.type == GrammarType.Lexer) { NFAState start = grammar.GetRuleStartState(scopeName, tokenReference.Text); if (start != null) { Rule rr = grammar.GetRule(scopeName, tokenReference.Text); StateCluster g = factory.BuildRuleRef(rr, start); tokenReference._nfaStartState = g.Left; // don't add FOLLOW transitions in the lexer; // only exact context should be used. return g; } return null; } else { StateCluster g = factory.BuildAtom(tokenReference); tokenReference.followingNFAState = g.Right; return g; } }
private void HandleSetElementSet(IIntSet elements, StateCluster g) { Transition setTrans = g.Left.GetTransition(0); elements.AddAll(setTrans.Label.Set); }
/** From A|B|..|Z alternative block build * * o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts) * | ^ * o->o-B->o--| * | | * ... | * | | * o->o-Z->o--| * * So every alternative gets begin NFAState connected by epsilon * and every alt right side points at a block end NFAState. There is a * new NFAState in the NFAState in the StateCluster for each alt plus one for the * end NFAState. * * Special case: only one alternative: don't make a block with alt * begin/end. * * Special case: if just a list of tokens/chars/sets, then collapse * to a single edge'd o-set->o graph. * * Set alt number (1..n) in the left-Transition NFAState. */ public virtual StateCluster BuildAlternativeBlock(ICollection <StateCluster> alternativeStateClusters) { StateCluster result = null; if (alternativeStateClusters == null || alternativeStateClusters.Count == 0) { return(null); } // single alt case if (alternativeStateClusters.Count == 1) { // single alt, no decision, just return only alt state cluster StateCluster g = alternativeStateClusters.First(); NFAState startOfAlt = NewState(); // must have this no matter what TransitionBetweenStates(startOfAlt, g.left, Label.EPSILON); //System.Console.Out.WriteLine( "### opt saved start/stop end in (...)" ); return(new StateCluster(startOfAlt, g.right)); } // even if we can collapse for lookahead purposes, we will still // need to predict the alts of this subrule in case there are actions // etc... This is the decision that is pointed to from the AST node // (always) NFAState prevAlternative = null; // tracks prev so we can link to next alt NFAState firstAlt = null; NFAState blockEndNFAState = NewState(); blockEndNFAState.Description = "end block"; int altNum = 1; foreach (StateCluster g in alternativeStateClusters) { // add begin NFAState for this alt connected by epsilon NFAState left = NewState(); left.Description = "alt " + altNum + " of ()"; TransitionBetweenStates(left, g.left, Label.EPSILON); TransitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON); // Are we the first alternative? if (firstAlt == null) { firstAlt = left; // track extreme left node of StateCluster } else { // if not first alternative, must link to this alt from previous TransitionBetweenStates(prevAlternative, left, Label.EPSILON); } prevAlternative = left; altNum++; } // return StateCluster pointing representing entire block // Points to first alt NFAState on left, block end on right result = new StateCluster(firstAlt, blockEndNFAState); firstAlt.decisionStateType = NFAState.BLOCK_START; // set EOB markers for Jean firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber; return(result); }
private void HandleNotAtomEnd(GrammarAST notNode, StateCluster g) { notNode.followingNFAState = g.Right; }
private StateCluster HandleTreeElement(StateCluster g, StateCluster element) { return factory.BuildAB(g, element); }