/** EOT (end of token) is a label that indicates when the DFA conversion * algorithm would "fall off the end of a lexer rule". It normally * means the default clause. So for ('a'..'z')+ you would see a DFA * with a state that has a..z and EOT emanating from it. a..z would * jump to a state predicting alt 1 and EOT would jump to a state * predicting alt 2 (the exit loop branch). EOT implies anything other * than a..z. If for some reason, the set is "all char" such as with * the wildcard '.', then EOT cannot match anything. For example, * * BLOCK : '{' (.)* '}' * * consumes all char until EOF when greedy=true. When all edges are * combined for the DFA state after matching '}', you will find that * it is all char. The EOT transition has nothing to match and is * unreachable. The findNewDFAStatesAndAddDFATransitions() method * must know to ignore the EOT, so we simply remove it from the * reachable labels. Later analysis will find that the exit branch * is not predicted by anything. For greedy=false, we leave only * the EOT label indicating that the DFA should stop immediately * and predict the exit branch. The reachable labels are often a * set of disjoint values like: [<EOT>, 42, {0..41, 43..65534}] * due to DFA conversion so must construct a pure set to see if * it is same as Label.ALLCHAR. * * Only do this for Lexers. * * If EOT coexists with ALLCHAR: * 1. If not greedy, modify the labels parameter to be EOT * 2. If greedy, remove EOT from the labels set */ protected boolean ReachableLabelsEOTCoexistsWithAllChar(OrderedHashSet labels) { Label eot = new Label(Label.EOT); if ( !labels.containsKey(eot) ) { return false; } [email protected]("### contains EOT"); bool containsAllChar = false; IntervalSet completeVocab = new IntervalSet(); int n = labels.size(); for (int i=0; i<n; i++) { Label rl = (Label)labels.get(i); if ( !rl.Equals(eot) ) { completeVocab.addAll(rl.Set()); } } [email protected]("completeVocab="+completeVocab); if ( completeVocab.Equals(Label.ALLCHAR) ) { [email protected]("all char"); containsAllChar = true; } return containsAllChar; }
/** Return a new set with the intersection of this set with other. Because * the intervals are sorted, we can use an iterator for each list and * just walk them together. This is roughly O(min(n,m)) for interval * list lengths n and m. */ public IIntSet And(IIntSet other) { if (other == null) { //|| !(other instanceof IntervalSet) ) { return(null); // nothing in common with null set } var myIntervals = this.intervals; var theirIntervals = ((IntervalSet)other).intervals; IntervalSet intersection = new IntervalSet(); int mySize = myIntervals.Count; int theirSize = theirIntervals.Count; int i = 0; int j = 0; // iterate down both interval lists looking for nondisjoint intervals while (i < mySize && j < theirSize) { Interval mine = myIntervals[i]; Interval theirs = theirIntervals[j]; //[email protected]("mine="+mine+" and theirs="+theirs); if (mine.StartsBeforeDisjoint(theirs)) { // move this iterator looking for interval that might overlap i++; } else if (theirs.StartsBeforeDisjoint(mine)) { // move other iterator looking for interval that might overlap j++; } else if (mine.ProperlyContains(theirs)) { // overlap, add intersection, get next theirs intersection.Intervals.Add(theirs); j++; } else if (theirs.ProperlyContains(mine)) { // overlap, add intersection, get next mine intersection.Intervals.Add(mine); i++; } else if (!mine.Disjoint(theirs)) { // overlap, add intersection intersection.Add(mine.Intersection(theirs)); // Move the iterator of lower range [a..b], but not // the upper range as it may contain elements that will collide // with the next iterator. So, if mine=[0..115] and // theirs=[115..200], then intersection is 115 and move mine // but not theirs as theirs may collide with the next range // in thisIter. // move both iterators to next ranges if (mine.StartsAfterNonDisjoint(theirs)) { j++; } else if (theirs.StartsAfterNonDisjoint(mine)) { i++; } } } return(intersection); }
/** return a new set containing all elements in this but not in other. * Intervals may have to be broken up when ranges in this overlap * with ranges in other. other is assumed to be a subset of this; * anything that is in other but not in this will be ignored. * * Keep around, but 10-20-2005, I decided to make complement work w/o * subtract and so then subtract can simply be a&~b */ public IIntSet Subtract(IIntSet other) { if (other == null || !(other is IntervalSet)) { return(null); // nothing in common with null set } IntervalSet diff = new IntervalSet(); // iterate down both interval lists var thisIter = this.intervals.GetEnumerator(); var otherIter = ((IntervalSet)other).intervals.GetEnumerator(); Interval mine = null; Interval theirs = null; if (thisIter.MoveNext()) { mine = (Interval)thisIter.Current; } if (otherIter.MoveNext()) { theirs = (Interval)otherIter.Current; } while (mine != null) { //[email protected]("mine="+mine+", theirs="+theirs); // CASE 1: nothing in theirs removes a chunk from mine if (theirs == null || mine.disjoint(theirs)) { // SUBCASE 1a: finished traversing theirs; keep adding mine now if (theirs == null) { // add everything in mine to difference since theirs done diff.add(mine); mine = null; if (thisIter.MoveNext()) { mine = (Interval)thisIter.Current; } } else { // SUBCASE 1b: mine is completely to the left of theirs // so we can add to difference; move mine, but not theirs if (mine.startsBeforeDisjoint(theirs)) { diff.add(mine); mine = null; if (thisIter.MoveNext()) { mine = (Interval)thisIter.Current; } } // SUBCASE 1c: theirs is completely to the left of mine else { // keep looking in theirs theirs = null; if (otherIter.MoveNext()) { theirs = (Interval)otherIter.Current; } } } } else { // CASE 2: theirs breaks mine into two chunks if (mine.properlyContains(theirs)) { // must add two intervals: stuff to left and stuff to right diff.add(mine.a, theirs.a - 1); // don't actually add stuff to right yet as next 'theirs' // might overlap with it // The stuff to the right might overlap with next "theirs". // so it is considered next Interval right = new Interval(theirs.b + 1, mine.b); mine = right; // move theirs forward theirs = null; if (otherIter.MoveNext()) { theirs = (Interval)otherIter.Current; } } // CASE 3: theirs covers mine; nothing to add to diff else if (theirs.properlyContains(mine)) { // nothing to add, theirs forces removal totally of mine // just move mine looking for an overlapping interval mine = null; if (thisIter.MoveNext()) { mine = (Interval)thisIter.Current; } } // CASE 4: non proper overlap else { // overlap, but not properly contained diff.add(mine.differenceNotProperlyContained(theirs)); // update iterators bool moveTheirs = true; if (mine.startsBeforeNonDisjoint(theirs) || theirs.b > mine.b) { // uh oh, right of theirs extends past right of mine // therefore could overlap with next of mine so don't // move theirs iterator yet moveTheirs = false; } // always move mine mine = null; if (thisIter.MoveNext()) { mine = (Interval)thisIter.Current; } if (moveTheirs) { theirs = null; if (otherIter.MoveNext()) { theirs = (Interval)otherIter.Current; } } } } } return(diff); }
/** Given the set of possible values (rather than, say UNICODE or MAXINT), * return a new set containing all elements in vocabulary, but not in * this. The computation is (vocabulary - this). * * 'this' is assumed to be either a subset or equal to vocabulary. */ public virtual IIntSet Complement(Interval vocabulary) { if (vocabulary.b < MinElement || vocabulary.a > MaxElement) { // nothing in common with this set return(null); } int n = intervals.Count; if (n == 0) { return(IntervalSet.Of(vocabulary)); } IntervalSet compl = new IntervalSet(); Interval first = intervals[0]; // add a range from 0 to first.a constrained to vocab if (first.a > vocabulary.a) { compl.Intervals.Add(Interval.FromBounds(vocabulary.a, first.a - 1)); } for (int i = 1; i < n; i++) { if (intervals[i - 1].b >= vocabulary.b) { break; } if (intervals[i].a <= vocabulary.a) { continue; } if (intervals[i - 1].b == intervals[i].a - 1) { continue; } compl.Intervals.Add(Interval.FromBounds(Math.Max(vocabulary.a, intervals[i - 1].b + 1), Math.Min(vocabulary.b, intervals[i].a - 1))); //// from 2nd interval .. nth //Interval previous = intervals[i - 1]; //Interval current = intervals[i]; //IntervalSet s = IntervalSet.Of( previous.b + 1, current.a - 1 ); //IntervalSet a = (IntervalSet)s.And( vocabularyIS ); //compl.AddAll( a ); } Interval last = intervals[n - 1]; // add a range from last.b to maxElement constrained to vocab if (last.b < vocabulary.b) { compl.Intervals.Add(Interval.FromBounds(last.b + 1, vocabulary.b)); //IntervalSet s = IntervalSet.Of( last.b + 1, maxElement ); //IntervalSet a = (IntervalSet)s.And( vocabularyIS ); //compl.AddAll( a ); } return(compl); }
public static BitSet Of(IntervalSet set) { return(Of((IIntSet)set)); }
public virtual IIntSet Complement(int minElement, int maxElement) { return(this.Complement(IntervalSet.Of(minElement, maxElement))); }