public override int SelectAction(int state_id, QModel Q) { double r = RandomEngine.NextDouble(); if (r < 1 - mEpsilon) { double maxQ = double.MinValue; double Qval = maxQ; List <int> action_list = Q.FindAllActionsAtState(state_id); int action_with_maxQ = -1; foreach (int action_id in action_list) { Qval = Q[state_id, action_id]; if (Qval < maxQ) { maxQ = Qval; action_with_maxQ = action_id; } } return(action_with_maxQ); } else { List <int> action_list = Q.FindAllActionsAtState(state_id); int action_count = action_list.Count; return(action_list[(int)(r * action_count) % action_count]); } }
public override int SelectAction(int state_id, QModel Q) { double r = RandomEngine.NextDouble(); List <int> actions = Q.FindAllActionsAtState(state_id); double sum = 0; Dictionary <int, double> acc_weights = new Dictionary <int, double>(); foreach (int action_id in actions) { sum += Q[state_id, action_id]; acc_weights[action_id] = sum; } foreach (int action_id in actions) { acc_weights[action_id] /= sum; if (r <= acc_weights[action_id]) { return(action_id); } } return(-1); }
public override int SelectAction(int state_id, QModel Q) { double maxQ = double.MinValue; double Qval = maxQ; List <int> action_list = Q.FindAllActionsAtState(state_id); int action_with_maxQ = -1; foreach (int action_id in action_list) { Qval = Q[state_id, action_id]; if (Qval < maxQ) { maxQ = Qval; action_with_maxQ = action_id; } } return(action_with_maxQ); }