public int PickMove(string state) { var random = new Random(); //Sometimes make a random move if ((random.NextDouble() < ExploreRate) || (!QTable.ContainsKey(state))) { return(random.Next(4)); } var currentOptionsQVals = QTable[state]; var validActionList = new List <int>(); var maxQVal = currentOptionsQVals.Max(); //Get a list of all optimal values for (int i = 0; i < currentOptionsQVals.Count(); i++) { if (currentOptionsQVals[i] == maxQVal) { validActionList.Add(i); } } ; //pick one at random int action = validActionList[random.Next(validActionList.Count())]; return(action); }
public static string InitFileStructure(QTable Table) { string res = "[\n "; for (int i = 0; i < Table.tableStructure.states; i++) { for (int j = 0; j < Table.tableStructure.actions; j++) { res += $"{Table.tableStructure.actionValues[i][j]}, "; } res += "\n "; } return(res + "\b\b]"); }
public void Learn(string state, int action, double reward, string nextState) { if (!QTable.ContainsKey(state)) { QTable.Add(state, new List <double>() { 0, 0, 0, 0 }); } if (!QTable.ContainsKey(nextState)) { QTable.Add(nextState, new List <double>() { 0, 0, 0, 0 }); } var currentQ = QTable[state][action]; var newQ = reward + DiscountFactor * QTable[nextState].Max(); QTable[state][action] += LearningRate * (newQ - currentQ); }
static void Main(string[] args) { var actionList = new List <double[]>() { }; int states = 16; int actions = 4; for (int i = 0; i < states; i++) { actionList.Add(QTable.Zeros(actions)); } table = new QTable(states, actions, new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15" }, actionList); //Actions: UP, DOWN, RIGHT, LEFT table.ReadExistingStructure(); Console.WriteLine(QTable.InitStructure(table)); //for (int repeater = 0; repeater < 10; repeater++) //{ StartProcess(); //} Console.WriteLine(QTable.InitStructure(table)); table.SaveQTableToFile(); Console.ReadLine(); }