public int PickMove(string state) { var random = new Random(); //Sometimes make a random move if ((random.NextDouble() < ExploreRate) || (!QTable.ContainsKey(state))) { return(random.Next(4)); } var currentOptionsQVals = QTable[state]; var validActionList = new List <int>(); var maxQVal = currentOptionsQVals.Max(); //Get a list of all optimal values for (int i = 0; i < currentOptionsQVals.Count(); i++) { if (currentOptionsQVals[i] == maxQVal) { validActionList.Add(i); } } ; //pick one at random int action = validActionList[random.Next(validActionList.Count())]; return(action); }
public void Learn(string state, int action, double reward, string nextState) { if (!QTable.ContainsKey(state)) { QTable.Add(state, new List <double>() { 0, 0, 0, 0 }); } if (!QTable.ContainsKey(nextState)) { QTable.Add(nextState, new List <double>() { 0, 0, 0, 0 }); } var currentQ = QTable[state][action]; var newQ = reward + DiscountFactor * QTable[nextState].Max(); QTable[state][action] += LearningRate * (newQ - currentQ); }