protected double getBoundLP(double countLog, ref RlStats stats) { var value = getValueEstimate(ref stats); //if (m_searchParams.biasTermConstant == 0.0) return value; value += m_searchParams.biasTermConstant * Math.Sqrt(countLog / (stats.m_count + 1)); return(value); }
protected double getValueEstimate(ref RlStats stats) { return(stats.m_count > 0 ? stats.m_mean : m_firstPlayUrgency); }
public RlAction(Action_t action) { m_stats = new RlStats(); m_action = action; }