/** From set build single edge graph o->o-set->o. To conform to * what an alt block looks like, must have extra state on left. */ public virtual StateCluster BuildSet( IIntSet set, GrammarAST associatedAST ) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label( set ); Transition e = new Transition( label, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** Build an atom with all possible values in its label */ public virtual StateCluster BuildWildcard( GrammarAST associatedAST ) { NFAState left = NewState(); NFAState right = NewState(); left.associatedASTNode = associatedAST; right.associatedASTNode = associatedAST; Label label = new Label(IntervalSet.Of( _nfa.Grammar.TokenTypes )); // char or tokens Transition e = new Transition( label, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** EOT (end of token) is a label that indicates when the DFA conversion * algorithm would "fall off the end of a lexer rule". It normally * means the default clause. So for ('a'..'z')+ you would see a DFA * with a state that has a..z and EOT emanating from it. a..z would * jump to a state predicting alt 1 and EOT would jump to a state * predicting alt 2 (the exit loop branch). EOT implies anything other * than a..z. If for some reason, the set is "all char" such as with * the wildcard '.', then EOT cannot match anything. For example, * * BLOCK : '{' (.)* '}' * * consumes all char until EOF when greedy=true. When all edges are * combined for the DFA state after matching '}', you will find that * it is all char. The EOT transition has nothing to match and is * unreachable. The findNewDFAStatesAndAddDFATransitions() method * must know to ignore the EOT, so we simply remove it from the * reachable labels. Later analysis will find that the exit branch * is not predicted by anything. For greedy=false, we leave only * the EOT label indicating that the DFA should stop immediately * and predict the exit branch. The reachable labels are often a * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}] * due to DFA conversion so must construct a pure set to see if * it is same as Label.ALLCHAR. * * Only do this for Lexers. * * If EOT coexists with ALLCHAR: * 1. If not greedy, modify the labels parameter to be EOT * 2. If greedy, remove EOT from the labels set */ protected boolean ReachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) { Label eot = new Label(Label.EOT); if ( !labels.containsKey(eot) ) { return false; } [email protected]("### contains EOT"); bool containsAllChar = false; IntervalSet completeVocab = new IntervalSet(); int n = labels.size(); for (int i=0; i<n; i++) { Label rl = (Label)labels.get(i); if ( !rl.Equals(eot) ) { completeVocab.addAll(rl.Set()); } } [email protected]("completeVocab="+completeVocab); if ( completeVocab.Equals(Label.ALLCHAR) ) { [email protected]("all char"); containsAllChar = true; } return containsAllChar; }
public virtual StateCluster BuildRange( int a, int b ) { NFAState left = NewState(); NFAState right = NewState(); Label label = new Label( IntervalSet.Of( a, b ) ); Transition e = new Transition( label, right ); left.AddTransition( e ); StateCluster g = new StateCluster( left, right ); return g; }
/** Add a transition from this state to target with label. Return * the transition number from 0..n-1. */ public virtual int AddTransition( DFAState target, Label label ) { _transitions.Add( new Transition( label, target ) ); return _transitions.Count - 1; }
/** Add label uniquely and disjointly; intersection with * another set or int/char forces breaking up the set(s). * * Example, if reachable list of labels is [a..z, {k,9}, 0..9], * the disjoint list will be [{a..j,l..z}, k, 9, 0..8]. * * As we add NFA configurations to a DFA state, we might as well track * the set of all possible transition labels to make the DFA conversion * more efficient. W/o the reachable labels, we'd need to check the * whole vocabulary space (could be 0..\uFFFF)! The problem is that * labels can be sets, which may overlap with int labels or other sets. * As we need a deterministic set of transitions from any * state in the DFA, we must make the reachable labels set disjoint. * This operation amounts to finding the character classes for this * DFA state whereas with tools like flex, that need to generate a * homogeneous DFA, must compute char classes across all states. * We are going to generate DFAs with heterogeneous states so we * only care that the set of transitions out of a single state are * unique. :) * * The idea for adding a new set, t, is to look for overlap with the * elements of existing list s. Upon overlap, replace * existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t. * (if s[i]-t is nil, don't add). The remainder is t-s[i], which is * what you want to add to the set minus what was already there. The * remainder must then be compared against the i+1..n elements in s * looking for another collision. Each collision results in a smaller * and smaller remainder. Stop when you run out of s elements or * remainder goes to nil. If remainder is non nil when you run out of * s elements, then add remainder to the end. * * Single element labels are treated as sets to make the code uniform. */ protected virtual void AddReachableLabel( Label label ) { if ( _reachableLabels == null ) { _reachableLabels = new OrderedHashSet<Label>(); } /* [email protected]("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar)); [email protected]("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " + "reachableLabels="+reachableLabels.toString()); */ if ( _reachableLabels.Contains( label ) ) { // exact label present return; } IIntSet t = label.Set; IIntSet remainder = t; // remainder starts out as whole set to add int n = _reachableLabels.Size(); // only look at initial elements // walk the existing list looking for the collision for ( int i = 0; i < n; i++ ) { Label rl = _reachableLabels.Get( i ); /* [email protected]("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+ rl.toString(dfa.nfa.grammar)+"="+ intersection.toString(dfa.nfa.grammar)); */ if ( !Label.Intersect( label, rl ) ) { continue; } //[email protected](label+" collides with "+rl); // For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t) // (ignoring s_i-t if nil; don't put in list) // Replace existing s_i with intersection since we // know that will always be a non nil character class IIntSet s_i = rl.Set; IIntSet intersection = s_i.And( t ); _reachableLabels.Set( i, new Label( intersection ) ); // Compute s_i-t to see what is in current set and not in incoming IIntSet existingMinusNewElements = s_i.Subtract( t ); //[email protected](s_i+"-"+t+"="+existingMinusNewElements); if ( !existingMinusNewElements.IsNil ) { // found a new character class, add to the end (doesn't affect // outer loop duration due to n computation a priori. Label newLabel = new Label( existingMinusNewElements ); _reachableLabels.Add( newLabel ); } /* [email protected]("after collision, " + "reachableLabels="+reachableLabels.toString()); */ // anything left to add to the reachableLabels? remainder = t.Subtract( s_i ); if ( remainder.IsNil ) { break; // nothing left to add to set. done! } t = remainder; } if ( !remainder.IsNil ) { /* [email protected]("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " + "reachableLabels="+reachableLabels.toString()); [email protected]("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar)); */ Label newLabel = new Label( remainder ); _reachableLabels.Add( newLabel ); } /* [email protected]("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " + "reachableLabels="+reachableLabels.toString()); */ }
protected virtual Label GetLabelForSet( IntervalSet edgeSet ) { Label e = null; int atom = edgeSet.GetSingleElement(); if ( atom != Label.INVALID ) { e = new Label( atom ); } else { e = new Label( edgeSet ); } return e; }
public virtual bool Matches( Label other ) { if ( other.IsSet ) { return Matches( other.Set ); } if ( other.IsAtom ) { return Matches( other.Atom ); } return false; }
public virtual object Clone() { Label l; //try //{ //l = (Label)base.clone(); l = new Label( label ); l.label = this.label; l._labelSet = new IntervalSet(); l._labelSet.AddAll( this._labelSet ); //} //catch ( CloneNotSupportedException e ) //{ // throw new InternalError(); //} return l; }
public virtual void Add( Label a ) { if ( IsAtom ) { _labelSet = IntervalSet.Of( label ); label = SET; if ( a.IsAtom ) { _labelSet.Add( a.Atom ); } else if ( a.IsSet ) { _labelSet.AddAll( a.Set ); } else { throw new InvalidOperationException( "can't add element to Label of type " + label ); } return; } if ( IsSet ) { if ( a.IsAtom ) { _labelSet.Add( a.Atom ); } else if ( a.IsSet ) { _labelSet.AddAll( a.Set ); } else { throw new InvalidOperationException( "can't add element to Label of type " + label ); } return; } throw new InvalidOperationException( "can't add element to Label of type " + label ); }
public static bool Intersect( Label label, Label edgeLabel ) { bool hasIntersection = false; bool labelIsSet = label.IsSet; bool edgeIsSet = edgeLabel.IsSet; if ( !labelIsSet && !edgeIsSet && edgeLabel.label == label.label ) { hasIntersection = true; } else if ( labelIsSet && edgeIsSet && !edgeLabel.Set.And( label.Set ).IsNil ) { hasIntersection = true; } else if ( labelIsSet && !edgeIsSet && label.Set.Contains( edgeLabel.label ) ) { hasIntersection = true; } else if ( !labelIsSet && edgeIsSet && edgeLabel.Set.Contains( label.label ) ) { hasIntersection = true; } return hasIntersection; }