/******************************************************** * Function: CSet *******************************************************/ public CSet ( ) { m_set = new SparseBitSet(); m_complement = false; }
/** * Compares two SparseBitSets for equality. * @return true if the objects are the same; false otherwise. */ public static bool equals(SparseBitSet a, SparseBitSet b) { for (int i = 0, j = 0; i < a._size || j < b._size;) { if (i < a._size && (j >= b._size || a.offs[i] < b.offs[j])) { if (a.bits[i++] != 0) { return(false); } } else if (j < b._size && (i >= a._size || a.offs[i] > b.offs[j])) { if (b.bits[j++] != 0) { return(false); } } else { // equal keys if (a.bits[i++] != b.bits[j++]) { return(false); } } } return(true); }
/******************************************************** * Function: mimic * Description: Converts this NFA state into a copy of * the input one. *******************************************************/ public void mimic ( CNfa nfa ) { m_edge = nfa.m_edge; if (null != nfa.m_set) { if (null == m_set) { m_set = new CSet(); } m_set.mimic(nfa.m_set); } else { m_set = null; } m_next = nfa.m_next; m_next2 = nfa.m_next2; m_accept = nfa.m_accept; m_anchor = nfa.m_anchor; if (null != nfa.m_states) { m_states = (SparseBitSet)nfa.m_states.Clone(); } else { m_states = null; } }
private static void binop(SparseBitSet a, SparseBitSet b, BinOp op) { int nsize = a._size + b._size; long[] nbits; int [] noffs; int a_zero, a_size; // be very clever and avoid allocating more memory if we can. if (a.bits.Length < nsize) { // oh well, have to make working space. nbits = new long[nsize]; noffs = new int [nsize]; a_zero = 0; a_size = a._size; } else { // reduce, reuse, recycle! nbits = a.bits; noffs = a.offs; a_zero = a.bits.Length - a._size; a_size = a.bits.Length; // System.arraycopy(a.bits, 0, a.bits, a_zero, a.size); Array.Copy(a.bits, 0, a.bits, a_zero, a._size); //System.arraycopy(a.offs, 0, a.offs, a_zero, a.size); Array.Copy(a.offs, 0, a.offs, a_zero, a._size); } // ok, crunch through and binop those sets! nsize = 0; for (int i = a_zero, j = 0; i < a_size || j < b._size;) { long nb; int no; if (i < a_size && (j >= b._size || a.offs[i] < b.offs[j])) { nb = op.op(a.bits[i], 0); no = a.offs[i]; i++; } else if (j < b._size && (i >= a_size || a.offs[i] > b.offs[j])) { nb = op.op(0, b.bits[j]); no = b.offs[j]; j++; } else { // equal keys; merge. nb = op.op(a.bits[i], b.bits[j]); no = a.offs[i]; i++; j++; } if (nb != 0) { nbits[nsize] = nb; noffs[nsize] = no; nsize++; } } a.bits = nbits; a.offs = noffs; a._size = nsize; }
/******************************************************** * Function: mimic *******************************************************/ public void mimic ( CSet Set ) { m_complement = Set.m_complement; m_set = (SparseBitSet)Set.m_set.Clone(); }
/** Self-test. */ public static void _Main(string[] args) { const int ITER = 500; const int RANGE = 65536; SparseBitSet a = new SparseBitSet(); CUtility.ASSERT(!a.Get(0) && !a.Get(1)); CUtility.ASSERT(!a.Get(123329)); a.Set(0); CUtility.ASSERT(a.Get(0) && !a.Get(1)); a.Set(1); CUtility.ASSERT(a.Get(0) && a.Get(1)); a.clearAll(); CUtility.ASSERT(!a.Get(0) && !a.Get(1)); Random r = new Random(); Vector v = new Vector(); for (int n = 0; n < ITER; n++) { int rr = ((r.Next() >> 1) % RANGE) << 1; a.Set(rr); v.addElement(rr); // check that all the numbers are there. CUtility.ASSERT(a.Get(rr) && !a.Get(rr + 1) && !a.Get(rr - 1)); for (int i = 0; i < v.size(); i++) { CUtility.ASSERT(a.Get((int)v.elementAt(i))); } } SparseBitSet b = (SparseBitSet)a.Clone(); CUtility.ASSERT(a.Equals(b) && b.Equals(a)); for (int n = 0; n < ITER / 2; n++) { int rr = (r.Next() >> 1) % v.size(); int m = (int)v.elementAt(rr); b.clear(m); v.removeElementAt(rr); // check that numbers are removed properly. CUtility.ASSERT(!b.Get(m)); } CUtility.ASSERT(!a.Equals(b)); SparseBitSet c = (SparseBitSet)a.Clone(); SparseBitSet d = (SparseBitSet)a.Clone(); c.and(a); CUtility.ASSERT(c.Equals(a) && a.Equals(c)); c.xor(a); CUtility.ASSERT(!c.Equals(a) && c.size() == 0); d.or(b); CUtility.ASSERT(d.Equals(a) && !b.Equals(d)); d.and(b); CUtility.ASSERT(!d.Equals(a) && b.Equals(d)); d.xor(a); CUtility.ASSERT(!d.Equals(a) && !b.Equals(d)); c.or(d); c.or(b); CUtility.ASSERT(c.Equals(a) && a.Equals(c)); c = (SparseBitSet)d.Clone(); c.and(b); CUtility.ASSERT(c.size() == 0); System.Console.WriteLine("Success."); }
/** * Clones the SparseBitSet. */ public object Clone() { //SI:was clone SparseBitSet Set = (SparseBitSet)base.MemberwiseClone(); Set.bits = (long[])bits.Clone(); Set.offs = (int [])offs.Clone(); return(Set); }
public int m_accept_index; /* CNfa index corresponding to accepting actions. */ /*************************************************************** * Function: CBunch * Description: Constructor. **************************************************************/ public CBunch ( ) { m_nfa_set = null; m_nfa_bit = null; m_accept = null; m_anchor = CSpec.NONE; m_accept_index = -1; }
/******************************************************** * Function: CNfa *******************************************************/ public CNfa ( ) { m_edge = EMPTY; m_set = null; m_next = null; m_next2 = null; m_accept = null; m_anchor = CSpec.NONE; m_label = NO_LABEL; m_states = null; }
/*************************************************************** * Function: CDfa **************************************************************/ public CDfa ( int label ) { m_group = 0; m_mark = false; m_accept = null; m_anchor = CSpec.NONE; m_nfa_set = null; m_nfa_bit = null; m_label = label; }
/*************************************************************** * Function: processStates * Description: **************************************************************/ private void processStates ( SparseBitSet states, CNfa current ) { int size; int i; size = m_spec.m_states.Count; for (i = 0; i < size; ++i) { if (states.Get(i)) { m_spec.m_state_rules[i].addElement(current); } } }
public mEnum(SparseBitSet parent) { mSet = parent; }
/*************************************************************** * Function: reduce * Description: **************************************************************/ private void reduce ( ) { int i; int j; int k; int nrows; int reduced_ncols; int reduced_nrows; SparseBitSet Set; CDTrans dtrans; int size; Set = new SparseBitSet(); /* Save accept nodes and anchor entries. */ size = m_spec.m_dtrans_vector.size(); m_spec.m_anchor_array = new int[size]; m_spec.m_accept_vector = new Vector(); for (i = 0; i < size; ++i) { dtrans = (CDTrans)m_spec.m_dtrans_vector.elementAt(i); m_spec.m_accept_vector.addElement(dtrans.m_accept); m_spec.m_anchor_array[i] = dtrans.m_anchor; dtrans.m_accept = null; } /* Allocate column map. */ m_spec.m_col_map = new int[m_spec.m_dtrans_ncols]; for (i = 0; i < m_spec.m_dtrans_ncols; ++i) { m_spec.m_col_map[i] = -1; } /* Process columns for reduction. */ for (reduced_ncols = 0; ; ++reduced_ncols) { #if (DEBUG) { for (i = 0; i < reduced_ncols; ++i) { CUtility.ASSERT(-1 != m_spec.m_col_map[i]); } } #endif for (i = reduced_ncols; i < m_spec.m_dtrans_ncols; ++i) { if (-1 == m_spec.m_col_map[i]) { break; } } if (i >= m_spec.m_dtrans_ncols) { break; } #if (DEBUG) { CUtility.ASSERT(false == Set.Get(i)); CUtility.ASSERT(-1 == m_spec.m_col_map[i]); } #endif Set.Set(i); m_spec.m_col_map[i] = reduced_ncols; /* UNDONE: Optimize by doing all comparisons in one batch. */ for (j = i + 1; j < m_spec.m_dtrans_ncols; ++j) { if (-1 == m_spec.m_col_map[j] && col_equiv(i, j)) { m_spec.m_col_map[j] = reduced_ncols; } } } /* Reduce columns. */ k = 0; for (i = 0; i < m_spec.m_dtrans_ncols; ++i) { if (Set.Get(i)) { ++k; Set.clear(i); j = m_spec.m_col_map[i]; #if (DEBUG) { CUtility.ASSERT(j <= i); } #endif if (j == i) { continue; } col_copy(j, i); } } m_spec.m_dtrans_ncols = reduced_ncols; /* truncate m_dtrans at proper length (freeing extra) */ trunc_col(); #if (DEBUG) { CUtility.ASSERT(k == reduced_ncols); } #endif /* Allocate row map. */ nrows = m_spec.m_dtrans_vector.Count; m_spec.m_row_map = new int[nrows]; for (i = 0; i < nrows; ++i) { m_spec.m_row_map[i] = -1; } /* Process rows to reduce. */ for (reduced_nrows = 0; ; ++reduced_nrows) { #if (DEBUG) { for (i = 0; i < reduced_nrows; ++i) { CUtility.ASSERT(-1 != m_spec.m_row_map[i]); } } #endif for (i = reduced_nrows; i < nrows; ++i) { if (-1 == m_spec.m_row_map[i]) { break; } } if (i >= nrows) { break; } #if (DEBUG) { CUtility.ASSERT(false == Set.Get(i)); CUtility.ASSERT(-1 == m_spec.m_row_map[i]); } #endif Set.Set(i); m_spec.m_row_map[i] = reduced_nrows; /* UNDONE: Optimize by doing all comparisons in one batch. */ for (j = i + 1; j < nrows; ++j) { if (-1 == m_spec.m_row_map[j] && row_equiv(i, j)) { m_spec.m_row_map[j] = reduced_nrows; } } } /* Reduce rows. */ k = 0; for (i = 0; i < nrows; ++i) { if (Set.Get(i)) { ++k; Set.clear(i); j = m_spec.m_row_map[i]; #if (DEBUG) { CUtility.ASSERT(j <= i); } #endif if (j == i) { continue; } row_copy(j, i); } } m_spec.m_dtrans_vector.setSize(reduced_nrows); #if (DEBUG) { /*System.Console.WriteLine("k = " + k + "\nreduced_nrows = " + reduced_nrows + "");*/ CUtility.ASSERT(k == reduced_nrows); } #endif }
/** * Creates an empty Set with the same size as the given Set. */ public SparseBitSet(SparseBitSet _set) { bits = new long[_set._size]; offs = new int [_set._size]; _size = 0; }
/*************************************************************** * Function: machine * Description: Recursive descent regular expression parser. **************************************************************/ private CNfa machine ( ) { CNfa start; CNfa p; SparseBitSet states; #if (DESCENT_DEBUG) { CUtility.enter("machine", m_spec.m_lexeme, m_spec.m_current_token); } #endif start = CAlloc.newCNfa(m_spec); p = start; states = m_lexGen.getStates(); /* Begin: Added for states. */ m_spec.m_current_token = CLexGen.EOS; m_lexGen.advance(); /* End: Added for states. */ if (CLexGen.END_OF_INPUT != m_spec.m_current_token) // CSA fix. { p.m_next = rule(); processStates(states, p.m_next); } while (CLexGen.END_OF_INPUT != m_spec.m_current_token) { /* Make state changes HERE. */ states = m_lexGen.getStates(); /* Begin: Added for states. */ m_lexGen.advance(); if (CLexGen.END_OF_INPUT == m_spec.m_current_token) { break; } /* End: Added for states. */ p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = rule(); processStates(states, p.m_next); } // CSA: add pseudo-rules for BOL and EOF SparseBitSet all_states = new SparseBitSet(); for (int i = 0; i < m_spec.m_states.Count; ++i) { all_states.Set(i); } p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_edge = CNfa.CCL; p.m_next.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_set = new CSet(); p.m_next.m_set.add(m_spec.BOL); p.m_next.m_set.add(m_spec.EOF); p.m_next.m_next.m_accept = // do-nothing accept rule new CAccept(new char[0], 0, m_input.m_line_number + 1); processStates(all_states, p.m_next); // CSA: done. #if (DESCENT_DEBUG) { CUtility.leave("machine", m_spec.m_lexeme, m_spec.m_current_token); } #endif return(start); }
/** * Logically XORs this bit Set with the specified Set of bits. * @param Set the bit Set to be XORed with */ public void xor(SparseBitSet Set) { binop(this, Set, XOR); }
/** * Logically ANDs this bit Set with the specified Set of bits. * @param Set the bit Set to be ANDed with */ public void and(SparseBitSet Set) { binop(this, Set, AND); }
/*************************************************************** * Function: move * Description: Returns null if resulting NFA Set is empty. **************************************************************/ void move ( Vector nfa_set, SparseBitSet nfa_bit, int b, CBunch bunch ) { int size; int index; CNfa state; bunch.m_nfa_set = null; bunch.m_nfa_bit = null; size = nfa_set.Count; // System.Console.WriteLine(size); for (index = 0; index < size; ++index) { state = (CNfa)nfa_set.elementAt(index); // System.Console.WriteLine(index+" "+state.m_set); if (b == state.m_edge || (CNfa.CCL == state.m_edge && state.m_set.contains(b))) { // System.Console.WriteLine(state.m_edge+" "+b); if (null == bunch.m_nfa_set) { #if (DEBUG) { CUtility.ASSERT(null == bunch.m_nfa_bit); } #endif bunch.m_nfa_set = new Vector(); /*bunch.m_nfa_bit * = new SparseBitSet(m_spec.m_nfa_states.size());*/ bunch.m_nfa_bit = new SparseBitSet(); } bunch.m_nfa_set.addElement(state.m_next); /*System.Console.WriteLine("Size of bitset: " + bunch.m_nfa_bit.size()); * System.Console.WriteLine("Reference index: " + state.m_next.m_label); * System.out.flush();*/ bunch.m_nfa_bit.Set(state.m_next.m_label); } } if (null != bunch.m_nfa_set) { #if (DEBUG) { CUtility.ASSERT(null != bunch.m_nfa_bit); } #endif sortStates(bunch.m_nfa_set); } return; }
/** Compute minimum Set of character classes needed to disambiguate * edges. We optimistically assume that every character belongs to * a single character class, and then incrementally split classes * as we see edges that require discrimination between characters in * the class. [CSA, 25-Jul-1999] */ private void computeClasses(CSpec m_spec) { this.original_charset_size = m_spec.m_dtrans_ncols; this.ccls = new int[original_charset_size]; // initially all zero. int nextcls = 1; SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet(); Hashtable h = new Hashtable(); if (m_spec.m_verbose) { System.Console.Write("Working on character classes."); } IEnumerator e = m_spec.m_nfa_states.elements(); while (e.MoveNext()) { CNfa nfa = (CNfa)e.Current; if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON) { continue; // no discriminatory information. } clsA.clearAll(); clsB.clearAll(); for (int i = 0; i < ccls.Length; i++) { if (nfa.m_edge == i || // edge labeled with a character nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) // Set of characters { clsA.Set(ccls[i]); } else { clsB.Set(ccls[i]); } } // now figure out which character classes we need to split. clsA.and(clsB); // split the classes which show up on both sides of edge if (m_spec.m_verbose) { System.Console.Write(clsA.size() == 0?".":":"); } if (clsA.size() == 0) { continue; // nothing to do. } // and split them. h.Clear(); // h will map old to new class name for (int i = 0; i < ccls.Length; i++) { if (clsA.Get(ccls[i])) // a split class { if (nfa.m_edge == i || nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) { // on A side int split = ccls[i]; if (!h.ContainsKey(split)) { h.Add(split, (nextcls++)); // make new class } ccls[i] = (int)h[split]; } } } } if (m_spec.m_verbose) { System.Console.WriteLine(); System.Console.WriteLine("NFA has " + nextcls + " distinct character classes."); } this.mapped_charset_size = nextcls; }