public override ReinforcementLearningAction ActionForState(ReinforcementLearningState state, ReinforcementLearningQStore qFunction) { Random r = new Random(); if (r.NextDouble() > EPSILON) { return new InvertedPendulumAction2((float)r.NextDouble()); } else { ReinforcementLearningAction ret; float value; qFunction.GetBestActionAndUtilityForState(state, out ret, out value); return ret; } }
public virtual ReinforcementLearningAction ActionForState(ReinforcementLearningState state, ReinforcementLearningQStore qFunction) { ReinforcementLearningAction[] actions; float[] probs; ActionProbabilities(state, qFunction, out actions, out probs); float rand = (float)r.NextDouble(); int i = 0; float sum = 0; do { sum += probs[i]; ++i; } while ((i < actions.Length) && (sum < rand)); return actions[i - 1]; }
public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities) { ReinforcementLearningAction action; float utility; actions = state.GetActions(); probabilities = new float[actions.Length]; qFunction.GetBestActionAndUtilityForState(state, out action, out utility); for (int i = 0; i < actions.Length; ++i) { if (actions[i].Equals(action)) { probabilities[i] = Epsilon; } else probabilities[i] = (1 - Epsilon) / (actions.Length - 1); } }
public new ReinforcementLearningAction ActionForState(ReinforcementLearningState state, ReinforcementLearningQStore qFunction) { ReinforcementLearningAction action; float utility; qFunction.GetBestActionAndUtilityForState(state, out action, out utility); return action; }
public abstract void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities);
public void Initialize(ReinforcementLearningEnvironment env) { qFunction = CreateQStore(env); }
public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities) { actions = state.GetActions(); probabilities = new float[actions.Length]; float maxq = float.MinValue; for(int i=0; i<actions.Length; ++i) { float q = qFunction.Evaluate(state, actions[i]); probabilities[i] = q; if (q > maxq) maxq = q; } float sum = 0; for (int i = 0; i < actions.Length; ++i) { probabilities[i] = (float)Math.Exp((probabilities[i] - maxq) / Temperature); sum += probabilities[i]; } for (int i = 0; i < actions.Length; ++i) { probabilities[i] /= sum; } }
public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities) { throw new NotImplementedException(); }