static QState ArgMax(Dictionary <QState, double> values) { QState max = values.First().Key; foreach (var state in values) { if (state.Value > values[max]) { max = state.Key; } } return(max); }
public void Train(QState goal, int iterations, double learningRate, double discountRate) { var rng = new Random(); for (int epoch = 0; epoch < iterations; ++epoch) { foreach (var item in initialStateProvider) { item.SetReward(); } QState prevState = null; var state = initialStateProvider[0]; QState nextState = state.GetRandNextState(null); QState nextNextState = null; while (true) { List <QState> possNextNextStates = nextState.GetAvailableStates(); double maxQ = double.MinValue; double maxNrAp = double.MaxValue; for (int j = 0; j < possNextNextStates.Count; ++j) { var possNextState = possNextNextStates[j]; double q = Q[nextState][possNextState]; if (q > maxQ) { maxQ = q; maxNrAp = (Nsa[nextState][possNextState] - 1) * 1000; nextNextState = possNextState; } } Nsa[state][nextState] += 0.004; Q[state][nextState] = Q[state][nextState] + learningRate / Nsa[state][nextState] * (state.GetReward().ComputeReward() + discountRate * maxQ - 1 * Q[state][nextState]); if (state.Equals(goal)) { Q[state][state] = state.GetReward().ComputeReward(); break; } prevState = state; state = nextState; nextState = nextNextState; } } }
public List <QState> Walk(QState start, QState goal) { const int maxListLen = 300; List <QState> result = new List <QState>(); result.Add(start); QState current = start; QState next; Console.Write(current.ToString() + "->"); while (!current.Equals(goal)) { next = ArgMax(Q[current]); Console.Write(next + "->"); result.Add(next); current = next; if (result.Count == maxListLen) { break; } } return(result); }
public abstract QState GetRandNextState(QState prevState);