/******************************************************** * Function: add *******************************************************/ public void add ( int i ) { m_set.Set(i); }
/** Self-test. */ public static void _Main(string[] args) { const int ITER = 500; const int RANGE = 65536; SparseBitSet a = new SparseBitSet(); CUtility.ASSERT(!a.Get(0) && !a.Get(1)); CUtility.ASSERT(!a.Get(123329)); a.Set(0); CUtility.ASSERT(a.Get(0) && !a.Get(1)); a.Set(1); CUtility.ASSERT(a.Get(0) && a.Get(1)); a.clearAll(); CUtility.ASSERT(!a.Get(0) && !a.Get(1)); Random r = new Random(); Vector v = new Vector(); for (int n = 0; n < ITER; n++) { int rr = ((r.Next() >> 1) % RANGE) << 1; a.Set(rr); v.addElement(rr); // check that all the numbers are there. CUtility.ASSERT(a.Get(rr) && !a.Get(rr + 1) && !a.Get(rr - 1)); for (int i = 0; i < v.size(); i++) { CUtility.ASSERT(a.Get((int)v.elementAt(i))); } } SparseBitSet b = (SparseBitSet)a.Clone(); CUtility.ASSERT(a.Equals(b) && b.Equals(a)); for (int n = 0; n < ITER / 2; n++) { int rr = (r.Next() >> 1) % v.size(); int m = (int)v.elementAt(rr); b.clear(m); v.removeElementAt(rr); // check that numbers are removed properly. CUtility.ASSERT(!b.Get(m)); } CUtility.ASSERT(!a.Equals(b)); SparseBitSet c = (SparseBitSet)a.Clone(); SparseBitSet d = (SparseBitSet)a.Clone(); c.and(a); CUtility.ASSERT(c.Equals(a) && a.Equals(c)); c.xor(a); CUtility.ASSERT(!c.Equals(a) && c.size() == 0); d.or(b); CUtility.ASSERT(d.Equals(a) && !b.Equals(d)); d.and(b); CUtility.ASSERT(!d.Equals(a) && b.Equals(d)); d.xor(a); CUtility.ASSERT(!d.Equals(a) && !b.Equals(d)); c.or(d); c.or(b); CUtility.ASSERT(c.Equals(a) && a.Equals(c)); c = (SparseBitSet)d.Clone(); c.and(b); CUtility.ASSERT(c.size() == 0); System.Console.WriteLine("Success."); }
/*************************************************************** * Function: machine * Description: Recursive descent regular expression parser. **************************************************************/ private CNfa machine ( ) { CNfa start; CNfa p; SparseBitSet states; if (CUtility.DESCENT_DEBUG) { CUtility.enter("machine", m_spec.m_lexeme, m_spec.m_current_token); } start = CAlloc.newCNfa(m_spec); p = start; states = m_lexGen.getStates(); /* Begin: Added for states. */ m_spec.m_current_token = CLexGen.EOS; m_lexGen.advance(); /* End: Added for states. */ if (CLexGen.END_OF_INPUT != m_spec.m_current_token) // CSA fix. { p.m_next = rule(); processStates(states, p.m_next); } while (CLexGen.END_OF_INPUT != m_spec.m_current_token) { /* Make state changes HERE. */ states = m_lexGen.getStates(); /* Begin: Added for states. */ m_lexGen.advance(); if (CLexGen.END_OF_INPUT == m_spec.m_current_token) { break; } /* End: Added for states. */ p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = rule(); processStates(states, p.m_next); } // CSA: add pseudo-rules for BOL and EOF SparseBitSet all_states = new SparseBitSet(); for (int i = 0; i < m_spec.m_states.Count; ++i) { all_states.Set(i); } p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_edge = CNfa.CCL; p.m_next.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_set = new CSet(); p.m_next.m_set.add(m_spec.BOL); p.m_next.m_set.add(m_spec.EOF); p.m_next.m_next.m_accept = // do-nothing accept rule new CAccept(new char[0], 0, m_input.m_line_number + 1); processStates(all_states, p.m_next); // CSA: done. if (CUtility.DESCENT_DEBUG) { CUtility.leave("machine", m_spec.m_lexeme, m_spec.m_current_token); } return(start); }
/** Compute minimum Set of character classes needed to disambiguate * edges. We optimistically assume that every character belongs to * a single character class, and then incrementally split classes * as we see edges that require discrimination between characters in * the class. [CSA, 25-Jul-1999] */ private void computeClasses(CSpec m_spec) { this.original_charset_size = m_spec.m_dtrans_ncols; this.ccls = new int[original_charset_size]; // initially all zero. int nextcls = 1; SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet(); Hashtable h = new Hashtable(); System.Console.Write("Working on character classes."); IEnumerator e=m_spec.m_nfa_states.elements(); while ( e.MoveNext() ) { CNfa nfa = (CNfa) e.Current; if (nfa.m_edge==CNfa.EMPTY || nfa.m_edge==CNfa.EPSILON) continue; // no discriminatory information. clsA.clearAll(); clsB.clearAll(); for (int i=0; i<ccls.Length; i++) if (nfa.m_edge==i || // edge labeled with a character nfa.m_edge==CNfa.CCL && nfa.m_set.contains(i)) // Set of characters clsA.Set(ccls[i]); else clsB.Set(ccls[i]); // now figure out which character classes we need to split. clsA.and(clsB); // split the classes which show up on both sides of edge System.Console.Write(clsA.size()==0?".":":"); if (clsA.size()==0) continue; // nothing to do. // and split them. h.Clear(); // h will map old to new class name for (int i=0; i<ccls.Length; i++) if (clsA.Get(ccls[i])) // a split class if (nfa.m_edge==i || nfa.m_edge==CNfa.CCL && nfa.m_set.contains(i)) { // on A side int split = ccls[i]; if (!h.ContainsKey(split)) h.Add(split, (nextcls++)); // make new class ccls[i] = (int)h[split]; } } System.Console.WriteLine(); System.Console.WriteLine("NFA has "+nextcls+" distinct character classes."); this.mapped_charset_size = nextcls; }
/** Compute minimum Set of character classes needed to disambiguate * edges. We optimistically assume that every character belongs to * a single character class, and then incrementally split classes * as we see edges that require discrimination between characters in * the class. [CSA, 25-Jul-1999] */ private void computeClasses(CSpec m_spec) { this.original_charset_size = m_spec.m_dtrans_ncols; this.ccls = new int[original_charset_size]; // initially all zero. int nextcls = 1; SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet(); Hashtable h = new Hashtable(); System.Console.Write("Working on character classes."); IEnumerator e = m_spec.m_nfa_states.elements(); while (e.MoveNext()) { CNfa nfa = (CNfa)e.Current; if (nfa.m_edge == CNfa.EMPTY || nfa.m_edge == CNfa.EPSILON) { continue; // no discriminatory information. } clsA.clearAll(); clsB.clearAll(); for (int i = 0; i < ccls.Length; i++) { if (nfa.m_edge == i || // edge labeled with a character nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) // Set of characters { clsA.Set(ccls[i]); } else { clsB.Set(ccls[i]); } } // now figure out which character classes we need to split. clsA.and(clsB); // split the classes which show up on both sides of edge System.Console.Write(clsA.size() == 0?".":":"); if (clsA.size() == 0) { continue; // nothing to do. } // and split them. h.Clear(); // h will map old to new class name for (int i = 0; i < ccls.Length; i++) { if (clsA.Get(ccls[i])) // a split class { if (nfa.m_edge == i || nfa.m_edge == CNfa.CCL && nfa.m_set.contains(i)) { // on A side int split = ccls[i]; if (!h.ContainsKey(split)) { h.Add(split, (nextcls++)); // make new class } ccls[i] = (int)h[split]; } } } } System.Console.WriteLine(); System.Console.WriteLine("NFA has " + nextcls + " distinct character classes."); this.mapped_charset_size = nextcls; }
/*************************************************************** * Function: reduce * Description: **************************************************************/ private void reduce ( ) { int i; int j; int k; int nrows; int reduced_ncols; int reduced_nrows; SparseBitSet Set; CDTrans dtrans; int size; Set = new SparseBitSet(); /* Save accept nodes and anchor entries. */ size = m_spec.m_dtrans_vector.size(); m_spec.m_anchor_array = new int[size]; m_spec.m_accept_vector = new Vector(); for (i = 0; i < size; ++i) { dtrans = (CDTrans)m_spec.m_dtrans_vector.elementAt(i); m_spec.m_accept_vector.addElement(dtrans.m_accept); m_spec.m_anchor_array[i] = dtrans.m_anchor; dtrans.m_accept = null; } /* Allocate column map. */ m_spec.m_col_map = new int[m_spec.m_dtrans_ncols]; for (i = 0; i < m_spec.m_dtrans_ncols; ++i) { m_spec.m_col_map[i] = -1; } /* Process columns for reduction. */ for (reduced_ncols = 0; ; ++reduced_ncols) { if (CUtility.DEBUG) { for (i = 0; i < reduced_ncols; ++i) { CUtility.ASSERT(-1 != m_spec.m_col_map[i]); } } for (i = reduced_ncols; i < m_spec.m_dtrans_ncols; ++i) { if (-1 == m_spec.m_col_map[i]) { break; } } if (i >= m_spec.m_dtrans_ncols) { break; } if (CUtility.DEBUG) { CUtility.ASSERT(false == Set.Get(i)); CUtility.ASSERT(-1 == m_spec.m_col_map[i]); } Set.Set(i); m_spec.m_col_map[i] = reduced_ncols; /* UNDONE: Optimize by doing all comparisons in one batch. */ for (j = i + 1; j < m_spec.m_dtrans_ncols; ++j) { if (-1 == m_spec.m_col_map[j] && true == col_equiv(i, j)) { m_spec.m_col_map[j] = reduced_ncols; } } } /* Reduce columns. */ k = 0; for (i = 0; i < m_spec.m_dtrans_ncols; ++i) { if (Set.Get(i)) { ++k; Set.clear(i); j = m_spec.m_col_map[i]; if (CUtility.DEBUG) { CUtility.ASSERT(j <= i); } if (j == i) { continue; } col_copy(j, i); } } m_spec.m_dtrans_ncols = reduced_ncols; /* truncate m_dtrans at proper length (freeing extra) */ trunc_col(); if (CUtility.DEBUG) { CUtility.ASSERT(k == reduced_ncols); } /* Allocate row map. */ nrows = m_spec.m_dtrans_vector.Count; m_spec.m_row_map = new int[nrows]; for (i = 0; i < nrows; ++i) { m_spec.m_row_map[i] = -1; } /* Process rows to reduce. */ for (reduced_nrows = 0; ; ++reduced_nrows) { if (CUtility.DEBUG) { for (i = 0; i < reduced_nrows; ++i) { CUtility.ASSERT(-1 != m_spec.m_row_map[i]); } } for (i = reduced_nrows; i < nrows; ++i) { if (-1 == m_spec.m_row_map[i]) { break; } } if (i >= nrows) { break; } if (CUtility.DEBUG) { CUtility.ASSERT(false == Set.Get(i)); CUtility.ASSERT(-1 == m_spec.m_row_map[i]); } Set.Set(i); m_spec.m_row_map[i] = reduced_nrows; /* UNDONE: Optimize by doing all comparisons in one batch. */ for (j = i + 1; j < nrows; ++j) { if (-1 == m_spec.m_row_map[j] && true == row_equiv(i, j)) { m_spec.m_row_map[j] = reduced_nrows; } } } /* Reduce rows. */ k = 0; for (i = 0; i < nrows; ++i) { if (Set.Get(i)) { ++k; Set.clear(i); j = m_spec.m_row_map[i]; if (CUtility.DEBUG) { CUtility.ASSERT(j <= i); } if (j == i) { continue; } row_copy(j, i); } } m_spec.m_dtrans_vector.setSize(reduced_nrows); if (CUtility.DEBUG) { /*System.Console.WriteLine("k = " + k + "\nreduced_nrows = " + reduced_nrows + "");*/ CUtility.ASSERT(k == reduced_nrows); } }
/*************************************************************** Function: reduce Description: **************************************************************/ private void reduce( ) { int i; int j; int k; int nrows; int reduced_ncols; int reduced_nrows; SparseBitSet Set; CDTrans dtrans; int size; Set = new SparseBitSet(); /* Save accept nodes and anchor entries. */ size = m_spec.m_dtrans_vector.size(); m_spec.m_anchor_array = new int[size]; m_spec.m_accept_vector = new Vector(); for (i = 0; i < size; ++i) { dtrans = (CDTrans) m_spec.m_dtrans_vector.elementAt(i); m_spec.m_accept_vector.addElement(dtrans.m_accept); m_spec.m_anchor_array[i] = dtrans.m_anchor; dtrans.m_accept = null; } /* Allocate column map. */ m_spec.m_col_map = new int[m_spec.m_dtrans_ncols]; for (i = 0; i < m_spec.m_dtrans_ncols; ++i) { m_spec.m_col_map[i] = -1; } /* Process columns for reduction. */ for (reduced_ncols = 0; ; ++reduced_ncols) { if (CUtility.DEBUG) { for (i = 0; i < reduced_ncols; ++i) { CUtility.ASSERT(-1 != m_spec.m_col_map[i]); } } for (i = reduced_ncols; i < m_spec.m_dtrans_ncols; ++i) { if (-1 == m_spec.m_col_map[i]) { break; } } if (i >= m_spec.m_dtrans_ncols) { break; } if (CUtility.DEBUG) { CUtility.ASSERT(false == Set.Get(i)); CUtility.ASSERT(-1 == m_spec.m_col_map[i]); } Set.Set(i); m_spec.m_col_map[i] = reduced_ncols; /* UNDONE: Optimize by doing all comparisons in one batch. */ for (j = i + 1; j < m_spec.m_dtrans_ncols; ++j) { if (-1 == m_spec.m_col_map[j] && true == col_equiv(i,j)) { m_spec.m_col_map[j] = reduced_ncols; } } } /* Reduce columns. */ k = 0; for (i = 0; i < m_spec.m_dtrans_ncols; ++i) { if (Set.Get(i)) { ++k; Set.clear(i); j = m_spec.m_col_map[i]; if (CUtility.DEBUG) { CUtility.ASSERT(j <= i); } if (j == i) { continue; } col_copy(j,i); } } m_spec.m_dtrans_ncols = reduced_ncols; /* truncate m_dtrans at proper length (freeing extra) */ trunc_col(); if (CUtility.DEBUG) { CUtility.ASSERT(k == reduced_ncols); } /* Allocate row map. */ nrows = m_spec.m_dtrans_vector.Count; m_spec.m_row_map = new int[nrows]; for (i = 0; i < nrows; ++i) { m_spec.m_row_map[i] = -1; } /* Process rows to reduce. */ for (reduced_nrows = 0; ; ++reduced_nrows) { if (CUtility.DEBUG) { for (i = 0; i < reduced_nrows; ++i) { CUtility.ASSERT(-1 != m_spec.m_row_map[i]); } } for (i = reduced_nrows; i < nrows; ++i) { if (-1 == m_spec.m_row_map[i]) { break; } } if (i >= nrows) { break; } if (CUtility.DEBUG) { CUtility.ASSERT(false == Set.Get(i)); CUtility.ASSERT(-1 == m_spec.m_row_map[i]); } Set.Set(i); m_spec.m_row_map[i] = reduced_nrows; /* UNDONE: Optimize by doing all comparisons in one batch. */ for (j = i + 1; j < nrows; ++j) { if (-1 == m_spec.m_row_map[j] && true == row_equiv(i,j)) { m_spec.m_row_map[j] = reduced_nrows; } } } /* Reduce rows. */ k = 0; for (i = 0; i < nrows; ++i) { if (Set.Get(i)) { ++k; Set.clear(i); j = m_spec.m_row_map[i]; if (CUtility.DEBUG) { CUtility.ASSERT(j <= i); } if (j == i) { continue; } row_copy(j,i); } } m_spec.m_dtrans_vector.setSize(reduced_nrows); if (CUtility.DEBUG) { /*System.Console.WriteLine("k = " + k + "\nreduced_nrows = " + reduced_nrows + "");*/ CUtility.ASSERT(k == reduced_nrows); } }
/*************************************************************** Function: machine Description: Recursive descent regular expression parser. **************************************************************/ private CNfa machine( ) { CNfa start; CNfa p; SparseBitSet states; if (CUtility.DESCENT_DEBUG) { CUtility.enter("machine",m_spec.m_lexeme,m_spec.m_current_token); } start = CAlloc.newCNfa(m_spec); p = start; states = m_lexGen.getStates(); /* Begin: Added for states. */ m_spec.m_current_token = CLexGen.EOS; m_lexGen.advance(); /* End: Added for states. */ if (CLexGen.END_OF_INPUT != m_spec.m_current_token) // CSA fix. { p.m_next = rule(); processStates(states,p.m_next); } while (CLexGen.END_OF_INPUT != m_spec.m_current_token) { /* Make state changes HERE. */ states = m_lexGen.getStates(); /* Begin: Added for states. */ m_lexGen.advance(); if (CLexGen.END_OF_INPUT == m_spec.m_current_token) { break; } /* End: Added for states. */ p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = rule(); processStates(states,p.m_next); } // CSA: add pseudo-rules for BOL and EOF SparseBitSet all_states = new SparseBitSet(); for (int i = 0; i < m_spec.m_states.Count; ++i) all_states.Set(i); p.m_next2 = CAlloc.newCNfa(m_spec); p = p.m_next2; p.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_edge = CNfa.CCL; p.m_next.m_next = CAlloc.newCNfa(m_spec); p.m_next.m_set = new CSet(); p.m_next.m_set.add(m_spec.BOL); p.m_next.m_set.add(m_spec.EOF); p.m_next.m_next.m_accept = // do-nothing accept rule new CAccept(new char[0], 0, m_input.m_line_number+1); processStates(all_states,p.m_next); // CSA: done. if (CUtility.DESCENT_DEBUG) { CUtility.leave("machine",m_spec.m_lexeme,m_spec.m_current_token); } return start; }
public SparseBitSet getStates( ) { int start_state; int count_state; SparseBitSet states; string name; object index; int i; int size; if (CUtility.DEBUG) { CUtility.ASSERT(null != this); CUtility.ASSERT(null != m_outstream); CUtility.ASSERT(null != m_input); CUtility.ASSERT(null != m_tokens); CUtility.ASSERT(null != m_spec); } states = null; /* Skip white space. */ while (CUtility.isspace(m_input.m_line[m_input.m_line_index])) { ++m_input.m_line_index; while (m_input.m_line_index >= m_input.m_line_read) { /* Must just be an empty line. */ if (m_input.getLine()) { /* EOF found. */ return null; } } } /* Look for states. */ if ('<' == m_input.m_line[m_input.m_line_index]) { ++m_input.m_line_index; states = new SparseBitSet(); /* Parse states. */ while (true) { /* We may have reached the end of the line. */ while (m_input.m_line_index >= m_input.m_line_read) { if (m_input.getLine()) { /* EOF found. */ CError.parse_error(CError.E_EOF,m_input.m_line_number); return states; } } while (true) { /* Skip white space. */ while (CUtility.isspace(m_input.m_line[m_input.m_line_index])) { ++m_input.m_line_index; while (m_input.m_line_index >= m_input.m_line_read) { if (m_input.getLine()) { /* EOF found. */ CError.parse_error(CError.E_EOF,m_input.m_line_number); return states; } } } if (',' != m_input.m_line[m_input.m_line_index]) { break; } ++m_input.m_line_index; } if ('>' == m_input.m_line[m_input.m_line_index]) { ++m_input.m_line_index; if (m_input.m_line_index < m_input.m_line_read) { m_advance_stop = true; } return states; } /* Read in state name. */ start_state = m_input.m_line_index; while (false == CUtility.isspace(m_input.m_line[m_input.m_line_index]) && ',' != m_input.m_line[m_input.m_line_index] && '>' != m_input.m_line[m_input.m_line_index]) { ++m_input.m_line_index; if (m_input.m_line_index >= m_input.m_line_read) { /* End of line means end of state name. */ break; } } count_state = m_input.m_line_index - start_state; /* Save name after checking definition. */ name = new string(m_input.m_line, start_state, count_state); index = (int) m_spec.m_states[name]; if (null == index) { /* Uninitialized state. */ System.Console.WriteLine("Uninitialized State Name: " + name); CError.parse_error(CError.E_STATE,m_input.m_line_number); } states.Set((int)index); } } if (null == all_states) { all_states = new SparseBitSet(); size = m_spec.m_states.Count; for (i = 0; i < size; ++i) { all_states.Set(i); } } if (m_input.m_line_index < m_input.m_line_read) { m_advance_stop = true; } return all_states; }
/** Self-test. */ public static void _Main(string[] args) { const int ITER = 500; const int RANGE= 65536; SparseBitSet a = new SparseBitSet(); CUtility.ASSERT(!a.Get(0) && !a.Get(1)); CUtility.ASSERT(!a.Get(123329)); a.Set(0); CUtility.ASSERT(a.Get(0) && !a.Get(1)); a.Set(1); CUtility.ASSERT(a.Get(0) && a.Get(1)); a.clearAll(); CUtility.ASSERT(!a.Get(0) && !a.Get(1)); Random r = new Random(); Vector v = new Vector(); for (int n=0; n<ITER; n++) { int rr = ((r.Next()>>1) % RANGE) << 1; a.Set(rr); v.addElement(rr); // check that all the numbers are there. CUtility.ASSERT(a.Get(rr) && !a.Get(rr+1) && !a.Get(rr-1)); for (int i=0; i<v.size(); i++) CUtility.ASSERT(a.Get((int)v.elementAt(i))); } SparseBitSet b = (SparseBitSet) a.Clone(); CUtility.ASSERT(a.Equals(b) && b.Equals(a)); for (int n=0; n<ITER/2; n++) { int rr = (r.Next()>>1) % v.size(); int m = (int)v.elementAt(rr); b.clear(m); v.removeElementAt(rr); // check that numbers are removed properly. CUtility.ASSERT(!b.Get(m)); } CUtility.ASSERT(!a.Equals(b)); SparseBitSet c = (SparseBitSet) a.Clone(); SparseBitSet d = (SparseBitSet) a.Clone(); c.and(a); CUtility.ASSERT(c.Equals(a) && a.Equals(c)); c.xor(a); CUtility.ASSERT(!c.Equals(a) && c.size()==0); d.or(b); CUtility.ASSERT(d.Equals(a) && !b.Equals(d)); d.and(b); CUtility.ASSERT(!d.Equals(a) && b.Equals(d)); d.xor(a); CUtility.ASSERT(!d.Equals(a) && !b.Equals(d)); c.or(d); c.or(b); CUtility.ASSERT(c.Equals(a) && a.Equals(c)); c = (SparseBitSet) d.Clone(); c.and(b); CUtility.ASSERT(c.size()==0); System.Console.WriteLine("Success."); }