netlist_time compute_next_timestep(double cur_ts) { nl_double new_solver_timestep = m_params.m_max_timestep; if (m_params.m_dynamic_ts) { for (int k = 0, iN = m_terms.size(); k < iN; k++) { analog_net_t n = m_nets[k]; terms_for_net_t t = m_terms[k].get(); nl_double DD_n = (n.Q_Analog() - t.last_V); nl_double hn = cur_ts; //printf("%f %f %f %f\n", DD_n, t->m_DD_n_m_1, hn, t->m_h_n_m_1); nl_double DD2 = (DD_n / hn - t.DD_n_m_1 / t.h_n_m_1) / (hn + t.h_n_m_1); nl_double new_net_timestep; t.h_n_m_1 = hn; t.DD_n_m_1 = DD_n; if (Math.Abs(DD2) > nl_config_global.NL_FCONST(1e-60)) // avoid div-by-zero { new_net_timestep = Math.Sqrt(m_params.m_dynamic_lte / Math.Abs(nl_config_global.NL_FCONST(0.5) * DD2)); } else { new_net_timestep = m_params.m_max_timestep; } if (new_net_timestep < new_solver_timestep) { new_solver_timestep = new_net_timestep; } t.last_V = n.Q_Analog(); } if (new_solver_timestep < m_params.m_min_timestep) { //log().warning("Dynamic timestep below min timestep. Consider decreasing MIN_TIMESTEP: {1} us", new_solver_timestep*1.0e6); new_solver_timestep = m_params.m_min_timestep; } } //if (new_solver_timestep > 10.0 * hn) // new_solver_timestep = 10.0 * hn; /* * FIXME: Factor 2 below is important. Without, we get timing issues. This must be a bug elsewhere. */ return(netlist_time.Max(netlist_time.from_double(new_solver_timestep), netlist_time.quantum() * 2)); }
//template <typename T> //void store(const T * RESTRICT V); //template <typename T> //T delta(const T * RESTRICT V); //template <typename T> //void build_LE_A(); //template <typename T> //void build_LE_RHS(); /* calculate matrix */ void setup_matrix() { UInt32 iN = (UInt32)m_nets.size(); for (UInt32 k = 0; k < iN; k++) { m_terms[k].railstart = m_terms[k].count(); for (UInt32 i = 0; i < m_rails_temp[k].count(); i++) { this.m_terms[k].add(m_rails_temp[k].terms()[i], m_rails_temp[k].connected_net_idx()[i], false); } m_terms[k].set_pointers(); } foreach (terms_for_net_t rt in m_rails_temp) { rt.clear(); // no longer needed //plib::pfree(rt); // no longer needed } m_rails_temp.clear(); /* Sort in descending order by number of connected matrix voltages. * The idea is, that for Gauss-Seidel algo the first voltage computed * depends on the greatest number of previous voltages thus taking into * account the maximum amout of information. * * This actually improves performance on popeye slightly. Average * GS computations reduce from 2.509 to 2.370 * * Smallest to largest : 2.613 * Unsorted : 2.509 * Largest to smallest : 2.370 * * Sorting as a general matrix pre-conditioning is mentioned in * literature but I have found no articles about Gauss Seidel. * * For Gaussian Elimination however increasing order is better suited. * NOTE: Even better would be to sort on elements right of the matrix diagonal. * */ if (m_sort != eSortType.NOSORT) { int sort_order = (m_sort == eSortType.DESCENDING ? 1 : -1); for (UInt32 k = 0; k < iN - 1; k++) { for (UInt32 i = k + 1; i < iN; i++) { if (((int)(m_terms[k].railstart) - (int)(m_terms[i].railstart)) * sort_order < 0) { //std::swap(m_terms[i], m_terms[k]); var termsTemp = m_terms[i]; m_terms[i] = m_terms[k]; m_terms[k] = termsTemp; //std::swap(m_nets[i], m_nets[k]); var netsTemp = m_nets[i]; m_nets[i] = m_nets[k]; m_nets[k] = netsTemp; } } } foreach (var term in m_terms) { var other = term.connected_net_idx(); for (UInt32 i = 0; i < term.count(); i++) { if (other[i] != -1) { other[i] = get_net_idx(term.terms()[i].otherterm.net()); } } } } /* create a list of non zero elements. */ for (UInt32 k = 0; k < iN; k++) { terms_for_net_t t = m_terms[k]; /* pretty brutal */ var other = t.connected_net_idx(); t.nz.clear(); for (UInt32 i = 0; i < t.railstart; i++) { if (!t.nz.Contains((UInt32)other[i])) //if (!plib::container::contains(t->m_nz, static_cast<unsigned>(other[i]))) { t.nz.push_back((UInt32)other[i]); } } t.nz.push_back(k); // add diagonal /* and sort */ t.nz.Sort(); //std::sort(t.m_nz.begin(), t.m_nz.end()); } /* create a list of non zero elements right of the diagonal * These list anticipate the population of array elements by * Gaussian elimination. */ for (UInt32 k = 0; k < iN; k++) { terms_for_net_t t = m_terms[k]; /* pretty brutal */ var other = t.connected_net_idx(); if (k == 0) { t.nzrd.clear(); } else { t.nzrd = m_terms[k - 1].nzrd; for (var jIdx = 0; jIdx < t.nzrd.Count;) //for (var j = t.nzrd.begin(); j != t.nzrd.end(); ) { var j = t.nzrd[jIdx]; if (j < k + 1) { t.nzrd.erase(jIdx); } else { ++jIdx; } } } for (UInt32 i = 0; i < t.railstart; i++) { if (!t.nzrd.Contains((UInt32)other[i]) && other[i] >= (int)(k + 1)) //if (!plib::container::contains(t->m_nzrd, static_cast<unsigned>(other[i])) && other[i] >= static_cast<int>(k + 1)) { t.nzrd.push_back((UInt32)other[i]); } } /* and sort */ t.nzrd.Sort(); //std::sort(t.m_nzrd.begin(), t.m_nzrd.end()); } /* create a list of non zero elements below diagonal k * This should reduce cache misses ... */ bool [,] touched = new bool [iN, iN]; //bool **touched = plib::palloc_array<bool *>(iN); //for (UInt32 k = 0; k < iN; k++) // touched[k] = plib::palloc_array<bool>(iN); for (UInt32 k = 0; k < iN; k++) { for (UInt32 j = 0; j < iN; j++) { touched[k, j] = false; } for (UInt32 j = 0; j < m_terms[k].nz.size(); j++) { touched[k, m_terms[k].nz[j]] = true; } } m_ops = 0; for (UInt32 k = 0; k < iN; k++) { m_ops++; // 1/A(k,k) for (UInt32 row = k + 1; row < iN; row++) { if (touched[row, k]) { m_ops++; if (!m_terms[k].nzbd.Contains(row)) //if (!plib::container::contains(m_terms[k]->m_nzbd, row)) { m_terms[k].nzbd.push_back(row); } for (UInt32 col = k + 1; col < iN; col++) { if (touched[k, col]) { touched[row, col] = true; m_ops += 2; } } } } } log().verbose.op("Number of mults/adds for {0}: {1}", name(), m_ops); #if false if ((0)) { for (unsigned k = 0; k < iN; k++) { pstring line = plib::pfmt("{1:3}")(k); for (unsigned j = 0; j < m_terms[k]->m_nzrd.size(); j++) { line += plib::pfmt(" {1:3}")(m_terms[k]->m_nzrd[j]); } log().verbose("{1}", line); } } #endif /* * save states */ for (UInt32 k = 0; k < iN; k++) { string num = new plib.pfmt("{0}").op(k); state().save(this, m_terms[k].last_V, "lastV." + num); state().save(this, m_terms[k].DD_n_m_1, "m_DD_n_m_1." + num); state().save(this, m_terms[k].h_n_m_1, "m_h_n_m_1." + num); state().save(this, m_terms[k].go(), "GO" + num, m_terms[k].count()); state().save(this, m_terms[k].gt(), "GT" + num, m_terms[k].count()); state().save(this, m_terms[k].Idr(), "IDR" + num, m_terms[k].count()); } //for (UInt32 k = 0; k < iN; k++) // plib::pfree_array(touched[k]); //plib::pfree_array(touched); touched = null; }