/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Find and return state with a given a kernel item Set (or null if not * found). The kernel item Set is the subset of items that were used to * originally create the state. These items are formed by "shifting the * dot" within items of other states that have a transition to this one. * The remaining elements of this state's item Set are added during closure. * @param itms the kernel Set of the state we are looking for. */ public static lalr_state find_state(lalr_item_set itms) { if (itms == null) { return(null); } else { return((lalr_state)_all[itms]); } }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Remove (Set subtract) a complete Set. * @param other the Set to remove. */ public void remove(lalr_item_set other) { not_null(other); /* walk down the other Set and do the removes individually */ IEnumerator e = other.all(); while (e.MoveNext()) { remove((lalr_item)e.Current); } }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Is this Set an (improper) subset of another? * @param other the other Set in question. */ public bool is_subset_of(lalr_item_set other) { not_null(other); /* walk down our Set and make sure every element is in the other */ IEnumerator e = all(); while (e.MoveNext()) { if (!other.contains((lalr_item)e.Current)) { return(false); } } /* they were all there */ return(true); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Equality comparison. */ public bool Equals(lalr_item_set other) { if (other == null || other.size() != size()) { return(false); } /* once we know they are the same size, then improper subset does test */ try { return(is_subset_of(other)); } catch (internal_error e) { /* can't throw error from here (because superclass doesn't) so crash */ e.crash(); return(false); } }
/*-----------------------------------------------------------*/ /*--- Constructor(s) ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Constructor for building a state from a Set of items. * @param itms the Set of items that makes up this state. */ public lalr_state(lalr_item_set itms) { /* don't allow null or duplicate item sets */ if (itms == null) throw new internal_error( "Attempt to construct an LALR state from a null item Set"); if (find_state(itms) != null) throw new internal_error( "Attempt to construct a duplicate LALR state"); /* assign a unique index */ _index = next_index++; /* store the items */ _items = itms; /* add to the global collection, keyed with its item Set */ _all.Add(_items,this); }
/*-----------------------------------------------------------*/ /*--- Constructor(s) ----------------------------------------*/ /*-----------------------------------------------------------*/ /** Constructor for building a state from a Set of items. * @param itms the Set of items that makes up this state. */ public lalr_state(lalr_item_set itms) { /* don't allow null or duplicate item sets */ if (itms == null) { throw new internal_error( "Attempt to construct an LALR state from a null item Set"); } if (find_state(itms) != null) { throw new internal_error( "Attempt to construct a duplicate LALR state"); } /* assign a unique index */ _index = next_index++; /* store the items */ _items = itms; /* add to the global collection, keyed with its item Set */ _all.Add(_items, this); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Is this Set an (improper) superset of another? * @param other the other Set in question. */ public bool is_superset_of(lalr_item_set other) { not_null(other); return(other.is_subset_of(this)); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Find and return state with a given a kernel item Set (or null if not * found). The kernel item Set is the subset of items that were used to * originally create the state. These items are formed by "shifting the * dot" within items of other states that have a transition to this one. * The remaining elements of this state's item Set are added during closure. * @param itms the kernel Set of the state we are looking for. */ public static lalr_state find_state(lalr_item_set itms) { if (itms == null) return null; else return (lalr_state)_all[itms]; }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Build an LALR viable prefix recognition machine given a start * production. This method operates by first building a start state * from the start production (based on a single item with the dot at * the beginning and EOF as expected lookahead). Then for each state * it attempts to extend the machine by creating transitions out of * the state to new or existing states. When considering extension * from a state we make a transition on each symbol that appears before * the dot in some item. For example, if we have the items: <pre> * [A ::= a b * X c, {d,e}] * [B ::= a b * X d, {a,b}] * </pre> * in some state, then we would be making a transition under X to a new * state. This new state would be formed by a "kernel" of items * corresponding to moving the dot past the X. In this case: <pre> * [A ::= a b X * c, {d,e}] * [B ::= a b X * Y, {a,b}] * </pre> * The full state would then be formed by "closing" this kernel Set of * items so that it included items that represented productions of things * the parser was now looking for. In this case we would items * corresponding to productions of Y, since various forms of Y are expected * next when in this state (see lalr_item_set.compute_closure() for details * on closure). <p> * * The process of building the viable prefix recognizer terminates when no * new states can be added. However, in order to build a smaller number of * states (i.e., corresponding to LALR rather than canonical LR) the state * building process does not maintain full loookaheads in all items. * Consequently, after the machine is built, we go back and propagate * lookaheads through the constructed machine using a call to * propagate_all_lookaheads(). This makes use of propagation links * constructed during the closure and transition process. * * @param start_prod the start production of the grammar * @see java_cup.lalr_item_set#compute_closure * @see java_cup.lalr_state#propagate_all_lookaheads */ public static lalr_state build_machine(production start_prod) { lalr_state start_state; lalr_item_set start_items; lalr_item_set new_items; lalr_item_set linked_items; lalr_item_set kernel; Stack work_stack = new Stack(); lalr_state st, new_st; symbol_set outgoing; lalr_item itm, new_itm, existing, fix_itm; symbol sym, sym2; IEnumerator i, s, fix; /* sanity check */ if (start_prod == null) { throw new internal_error( "Attempt to build viable prefix recognizer using a null production"); } /* build item with dot at front of start production and EOF lookahead */ start_items = new lalr_item_set(); itm = new lalr_item(start_prod); itm.lookahead().add(terminal.EOF); start_items.add(itm); /* create copy the item Set to form the kernel */ kernel = new lalr_item_set(start_items); /* create the closure from that item Set */ start_items.compute_closure(); /* build a state out of that item Set and put it in our work Set */ start_state = new lalr_state(start_items); work_stack.Push(start_state); /* enter the state using the kernel as the key */ _all_kernels.Add(kernel, start_state); /* continue looking at new states until we have no more work to do */ while (work_stack.Count != 0) { /* remove a state from the work Set */ st = (lalr_state)work_stack.Pop(); /* gather up all the symbols that appear before dots */ outgoing = new symbol_set(); i = st.items().all(); while (i.MoveNext()) { itm = (lalr_item)i.Current; /* add the symbol before the dot (if any) to our collection */ sym = itm.symbol_after_dot(); if (sym != null) { outgoing.add(sym); } } /* now create a transition out for each individual symbol */ s = outgoing.all(); while (s.MoveNext()) { sym = (symbol)s.Current; /* will be keeping the Set of items with propagate links */ linked_items = new lalr_item_set(); /* gather up shifted versions of all the items that have this * symbol before the dot */ new_items = new lalr_item_set(); i = st.items().all(); while (i.MoveNext()) { itm = (lalr_item)i.Current; /* if this is the symbol we are working on now, add to Set */ sym2 = itm.symbol_after_dot(); if (sym.Equals(sym2)) { /* add to the kernel of the new state */ new_items.add(itm.shift()); /* remember that itm has propagate link to it */ linked_items.add(itm); } } /* use new items as state kernel */ kernel = new lalr_item_set(new_items); /* have we seen this one already? */ new_st = (lalr_state)_all_kernels[kernel]; /* if we haven't, build a new state out of the item Set */ if (new_st == null) { /* compute closure of the kernel for the full item Set */ new_items.compute_closure(); /* build the new state */ new_st = new lalr_state(new_items); /* add the new state to our work Set */ work_stack.Push(new_st); /* put it in our kernel table */ _all_kernels.Add(kernel, new_st); } /* otherwise relink propagation to items in existing state */ else { /* walk through the items that have links to the new state */ fix = linked_items.all(); while (fix.MoveNext()) { fix_itm = (lalr_item)fix.Current; /* look at each propagate link out of that item */ for (int l = 0; l < fix_itm.propagate_items().Count; l++) { /* pull out item linked to in the new state */ new_itm = (lalr_item)fix_itm.propagate_items().ToArray()[l]; /* find corresponding item in the existing state */ existing = new_st.items().find(new_itm); /* fix up the item so it points to the existing Set */ if (existing != null) { fix_itm.set_propagate_item(existing, l); } } } } /* add a transition from current state to that state */ st.add_transition(sym, new_st); } } /* all done building states */ /* propagate complete lookahead sets throughout the states */ propagate_all_lookaheads(); return(start_state); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Constructor for cloning from another Set. * @param other indicates Set we should copy from. */ public lalr_item_set(lalr_item_set other) { not_null(other); _all = (Hashtable)other._all.Clone(); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Is this Set an (improper) superset of another? * @param other the other Set in question. */ public bool is_superset_of(lalr_item_set other) { not_null(other); return other.is_subset_of(this); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Compute the closure of the Set using the LALR closure rules. Basically * for every item of the form: <pre> * [L ::= a *N alpha, l] * </pre> * (where N is a a non terminal and alpha is a string of symbols) make * sure there are also items of the form: <pre> * [N ::= *beta, first(alpha l)] * </pre> * corresponding to each production of N. Items with identical cores but * differing lookahead sets are merged by creating a new item with the same * core and the union of the lookahead sets (the LA in LALR stands for * "lookahead merged" and this is where the merger is). This routine * assumes that nullability and first sets have been computed for all * productions before it is called. */ public void compute_closure() { lalr_item_set consider; lalr_item itm, new_itm, add_itm; non_terminal nt; terminal_set new_lookaheads; IEnumerator p; production prod; bool need_prop; /* invalidate cached hashcode */ is_cached = false; /* each current element needs to be considered */ consider = new lalr_item_set(this); /* repeat this until there is nothing else to consider */ while (consider.size() > 0) { /* get one item to consider */ itm = consider.get_one(); /* do we have a dot before a non terminal */ nt = itm.dot_before_nt(); if (nt != null) { /* create the lookahead Set based on first after dot */ new_lookaheads = itm.calc_lookahead(itm.lookahead()); /* are we going to need to propagate our lookahead to new item */ need_prop = itm.lookahead_visible(); /* create items for each production of that non term */ p = nt.productions(); while (p.MoveNext()) { prod = (production)p.Current; /* create new item with dot at start and that lookahead */ new_itm = new lalr_item(prod, new terminal_set(new_lookaheads)); /* add/merge item into the Set */ add_itm = add(new_itm); /* if propagation is needed link to that item */ if (need_prop) { itm.add_propagate(add_itm); } /* was this was a new item*/ if (add_itm == new_itm) { /* that may need further closure, consider it also */ consider.add(new_itm); } } } } }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Remove (Set subtract) a complete Set. * @param other the Set to remove. */ public void remove(lalr_item_set other) { not_null(other); /* walk down the other Set and do the removes individually */ IEnumerator e = other.all(); while ( e.MoveNext() ) remove((lalr_item)e.Current); }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Compute the closure of the Set using the LALR closure rules. Basically * for every item of the form: <pre> * [L ::= a *N alpha, l] * </pre> * (where N is a a non terminal and alpha is a string of symbols) make * sure there are also items of the form: <pre> * [N ::= *beta, first(alpha l)] * </pre> * corresponding to each production of N. Items with identical cores but * differing lookahead sets are merged by creating a new item with the same * core and the union of the lookahead sets (the LA in LALR stands for * "lookahead merged" and this is where the merger is). This routine * assumes that nullability and first sets have been computed for all * productions before it is called. */ public void compute_closure() { lalr_item_set consider; lalr_item itm, new_itm, add_itm; non_terminal nt; terminal_set new_lookaheads; IEnumerator p; production prod; bool need_prop; /* invalidate cached hashcode */ is_cached=false; /* each current element needs to be considered */ consider = new lalr_item_set(this); /* repeat this until there is nothing else to consider */ while (consider.size() > 0) { /* get one item to consider */ itm = consider.get_one(); /* do we have a dot before a non terminal */ nt = itm.dot_before_nt(); if (nt != null) { /* create the lookahead Set based on first after dot */ new_lookaheads = itm.calc_lookahead(itm.lookahead()); /* are we going to need to propagate our lookahead to new item */ need_prop = itm.lookahead_visible(); /* create items for each production of that non term */ p = nt.productions(); while ( p.MoveNext() ) { prod = (production)p.Current; /* create new item with dot at start and that lookahead */ new_itm = new lalr_item(prod, new terminal_set(new_lookaheads)); /* add/merge item into the Set */ add_itm = add(new_itm); /* if propagation is needed link to that item */ if (need_prop) itm.add_propagate(add_itm); /* was this was a new item*/ if (add_itm == new_itm) { /* that may need further closure, consider it also */ consider.add(new_itm); } } } } }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Build an LALR viable prefix recognition machine given a start * production. This method operates by first building a start state * from the start production (based on a single item with the dot at * the beginning and EOF as expected lookahead). Then for each state * it attempts to extend the machine by creating transitions out of * the state to new or existing states. When considering extension * from a state we make a transition on each symbol that appears before * the dot in some item. For example, if we have the items: <pre> * [A ::= a b * X c, {d,e}] * [B ::= a b * X d, {a,b}] * </pre> * in some state, then we would be making a transition under X to a new * state. This new state would be formed by a "kernel" of items * corresponding to moving the dot past the X. In this case: <pre> * [A ::= a b X * c, {d,e}] * [B ::= a b X * Y, {a,b}] * </pre> * The full state would then be formed by "closing" this kernel Set of * items so that it included items that represented productions of things * the parser was now looking for. In this case we would items * corresponding to productions of Y, since various forms of Y are expected * next when in this state (see lalr_item_set.compute_closure() for details * on closure). <p> * * The process of building the viable prefix recognizer terminates when no * new states can be added. However, in order to build a smaller number of * states (i.e., corresponding to LALR rather than canonical LR) the state * building process does not maintain full loookaheads in all items. * Consequently, after the machine is built, we go back and propagate * lookaheads through the constructed machine using a call to * propagate_all_lookaheads(). This makes use of propagation links * constructed during the closure and transition process. * * @param start_prod the start production of the grammar * @see java_cup.lalr_item_set#compute_closure * @see java_cup.lalr_state#propagate_all_lookaheads */ public static lalr_state build_machine(production start_prod) { lalr_state start_state; lalr_item_set start_items; lalr_item_set new_items; lalr_item_set linked_items; lalr_item_set kernel; Stack work_stack = new Stack(); lalr_state st, new_st; symbol_set outgoing; lalr_item itm, new_itm, existing, fix_itm; symbol sym, sym2; IEnumerator i, s, fix; /* sanity check */ if (start_prod == null) throw new internal_error( "Attempt to build viable prefix recognizer using a null production"); /* build item with dot at front of start production and EOF lookahead */ start_items = new lalr_item_set(); itm = new lalr_item(start_prod); itm.lookahead().add(terminal.EOF); start_items.add(itm); /* create copy the item Set to form the kernel */ kernel = new lalr_item_set(start_items); /* create the closure from that item Set */ start_items.compute_closure(); /* build a state out of that item Set and put it in our work Set */ start_state = new lalr_state(start_items); work_stack.Push(start_state); /* enter the state using the kernel as the key */ _all_kernels.Add(kernel, start_state); /* continue looking at new states until we have no more work to do */ while (work_stack.Count!=0) { /* remove a state from the work Set */ st = (lalr_state)work_stack.Pop(); /* gather up all the symbols that appear before dots */ outgoing = new symbol_set(); i = st.items().all(); while ( i.MoveNext() ) { itm = (lalr_item)i.Current; /* add the symbol before the dot (if any) to our collection */ sym = itm.symbol_after_dot(); if (sym != null) outgoing.add(sym); } /* now create a transition out for each individual symbol */ s = outgoing.all(); while ( s.MoveNext()) { sym = (symbol)s.Current; /* will be keeping the Set of items with propagate links */ linked_items = new lalr_item_set(); /* gather up shifted versions of all the items that have this symbol before the dot */ new_items = new lalr_item_set(); i = st.items().all(); while ( i.MoveNext()) { itm = (lalr_item)i.Current; /* if this is the symbol we are working on now, add to Set */ sym2 = itm.symbol_after_dot(); if (sym.Equals(sym2)) { /* add to the kernel of the new state */ new_items.add(itm.shift()); /* remember that itm has propagate link to it */ linked_items.add(itm); } } /* use new items as state kernel */ kernel = new lalr_item_set(new_items); /* have we seen this one already? */ new_st = (lalr_state)_all_kernels[kernel]; /* if we haven't, build a new state out of the item Set */ if (new_st == null) { /* compute closure of the kernel for the full item Set */ new_items.compute_closure(); /* build the new state */ new_st = new lalr_state(new_items); /* add the new state to our work Set */ work_stack.Push(new_st); /* put it in our kernel table */ _all_kernels.Add(kernel, new_st); } /* otherwise relink propagation to items in existing state */ else { /* walk through the items that have links to the new state */ fix = linked_items.all(); while ( fix.MoveNext() ) { fix_itm = (lalr_item)fix.Current; /* look at each propagate link out of that item */ for (int l =0; l < fix_itm.propagate_items().Count; l++) { /* pull out item linked to in the new state */ new_itm =(lalr_item) fix_itm.propagate_items().ToArray()[l]; /* find corresponding item in the existing state */ existing = new_st.items().find(new_itm); /* fix up the item so it points to the existing Set */ if (existing != null) { fix_itm.set_propagate_item(existing,l); } } } } /* add a transition from current state to that state */ st.add_transition(sym, new_st); } } /* all done building states */ /* propagate complete lookahead sets throughout the states */ propagate_all_lookaheads(); return start_state; }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Is this Set an (improper) subset of another? * @param other the other Set in question. */ public bool is_subset_of(lalr_item_set other) { not_null(other); /* walk down our Set and make sure every element is in the other */ IEnumerator e = all(); while ( e.MoveNext() ) if (!other.contains((lalr_item)e.Current)) return false; /* they were all there */ return true; }
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ /** Equality comparison. */ public bool Equals(lalr_item_set other) { if (other == null || other.size() != size()) return false; /* once we know they are the same size, then improper subset does test */ try { return is_subset_of(other); } catch (internal_error e) { /* can't throw error from here (because superclass doesn't) so crash */ e.crash(); return false; } }