public void GetBestActionAndUtilityForState(ReinforcementLearningState state, out ReinforcementLearningAction action, out float value) { InvertedPendulumAction2 bestaction = null; float bestq = float.MinValue; for (int i = 0; i < 10; ++i) { InvertedPendulumAction2 act = new InvertedPendulumAction2((float)r.NextDouble() * 2 - 1); float q = Evaluate(state, act); if (q > bestq) { bestq = q; bestaction = act; } } action = bestaction; value = bestq; }
public void ReplaceValue(InvertedPendulumState state, InvertedPendulumAction2 action, float value) { //TODO! delta??* igmn.Train(new Vector(new double[] { state.a, state.w, action.action, value })); }