public State exploitMove() { //current action, state already initialized by this point. //take action A here. int[] newStateLoc = getMove(this.oldState, oldAction); stepsTaken.Add(oldState); //get S prime here. newState = getNextState(newStateLoc[0], newStateLoc[1]); //Observe the reward of S prime. reward = getReward(newStateLoc[0], newStateLoc[1]); //Now it's time exploit. //Get the A prime from S prime. newAction = newState.exploit_Move(); //get delta here delta = reward + gamma * newState.moveWeights[(int)newAction] - oldState.moveWeights[(int)oldAction]; //set eligibilty here eStates[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldAction] += 1; //For all S, A update for (int rows = 0; rows < 20; rows++) { for (int cols = 0; cols < 20; cols++) { states[rows, cols].moveWeights[(int)oldAction] = states[rows, cols].moveWeights[(int)oldAction] + learnRate * delta * eStates[rows, cols].moveWeights[(int)oldAction]; eStates[rows, cols].moveWeights[(int)oldAction] = lambda * gamma * eStates[rows, cols].moveWeights[(int)oldAction]; } } //Update s to s' and a to a' here. oldAction = newAction; oldState = newState; return(newState); }
public int[] getMove(State currState, State.move moveDirection) { int[] moveCoordinates = new int[2]; switch (moveDirection) { case State.move.UP: moveCoordinates = new int[] { currState.rowIndex - 1, currState.columnIndex }; return(moveCoordinates); case State.move.DOWN: moveCoordinates = new int[] { currState.rowIndex + 1, currState.columnIndex }; return(moveCoordinates); case State.move.LEFT: moveCoordinates = new int[] { currState.rowIndex, currState.columnIndex - 1 }; return(moveCoordinates); case State.move.RIGHT: moveCoordinates = new int[] { currState.rowIndex, currState.columnIndex + 1 }; return(moveCoordinates); default: moveCoordinates = new int[] { 0, 0 }; return(moveCoordinates); } }
public State exploit() { stepsTaken.Add(current_State); State.move moveDir = current_State.exploit_Move(); states[current_State.rowIndex, current_State.columnIndex].directionTaken = moveDir; directionMoved = moveDir; current_State.directionTaken = moveDir; if (moveDir == State.move.UP) { bool terminal = checkForTerminal(current_State.rowIndex, current_State.columnIndex - 1); if (terminal) { return(current_State); } else { current_State = states[current_State.rowIndex, current_State.columnIndex - 1]; } } else if (moveDir == State.move.DOWN) { bool terminal = checkForTerminal(current_State.rowIndex, current_State.columnIndex + 1); if (terminal) { return(current_State); } else { current_State = states[current_State.rowIndex, current_State.columnIndex + 1]; } } else if (moveDir == State.move.LEFT) { bool terminal = checkForTerminal(current_State.rowIndex - 1, current_State.columnIndex); if (terminal) { return(current_State); } else { current_State = states[current_State.rowIndex - 1, current_State.columnIndex]; } } else { bool terminal = checkForTerminal(current_State.rowIndex + 1, current_State.columnIndex); if (terminal) { return(current_State); } else { current_State = states[current_State.rowIndex + 1, current_State.columnIndex]; } } return(current_State); }
private void b_Start_Click(object sender, EventArgs e) { DateTime end = DateTime.UtcNow.AddHours(7.5); Color green = Color.FromArgb(0, 100, 0); int epochs = 0; Random rand = new Random(); exploreThread = new Thread(() => { while (explore.epsilon >= .05) { //run 100 times before saving QSA to file //to reduce I/O usage. for (int i = 0; i < 100; i++) { explore.reset_ETable(); State currState = explore.oldState; explore.oldAction = currState.getInitAction(rand); State.move currAction = explore.oldAction; dgv_World.Invoke((MethodInvoker) delegate { dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Style.BackColor = Color.Green; }); do { double exploreChance = rand.NextDouble() * (99.999999 - 0.000) + 0.000; if (exploreChance <= explore.epsilon * 100) { explore.exploreMove(rand); } else { explore.exploitMove(); } if (explore.checkTerminal(explore.newState.rowIndex, explore.newState.columnIndex)) { explore.inTerminalState = true; } currState = explore.newState; currAction = explore.newAction; dgv_World.Invoke((MethodInvoker) delegate { dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Style.BackColor = Color.LightGreen; string move = ""; switch (currAction) { case State.move.UP: move = "U"; break; case State.move.DOWN: move = "D"; break; case State.move.LEFT: move = "L"; break; case State.move.RIGHT: move = "R"; break; } dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Value = move; }); if (explore.inTerminalState) { dgv_World.Invoke((MethodInvoker) delegate { dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Style.BackColor = Color.Red; }); } Thread.Sleep(150); } while (explore.oldState.isTerminalState == false); explore.epsilon = explore.epsilon - explore.decay_Rate; if (explore.epsilon <= .10) { explore.epsilon = .10; } reset_GUI(); explore.stepsTaken.Clear(); explore.resetStart(rand); } explore.saveQ_Table(); if (DateTime.UtcNow > end) { break; } } Application.Exit(); exploreThread.Join(); }); exploreThread.Start(); //string first = "Q_TableSave.txt"; //string second = "Q_Save10.txt"; //string[] linesA = File.ReadAllLines(first); //string[] linesb = File.ReadAllLines(second); //int count = 0; //for (int i = 0; i < linesA.Length; i++) //{ // if (linesA[i] != linesb[i]) // { // count++; // } //} //Console.Out.WriteLine(count); }
private void b_Start_Click(object sender, EventArgs e) { Color green = Color.FromArgb(0, 100, 0); int epochs = 0; Random rand = new Random(); exploreThread = new Thread(() => { for (int runs = 0; runs < 1000000; runs++) { for (int i = 0; i < 100; i++) { dgv_World.Rows[explore.startX].Cells[explore.startY].Style.BackColor = Color.Green; explore.reset_ETable(); State oldState = explore.current_State; State.move oldDirection = State.move.UP; do { double move_Type = rand.NextDouble() * (1.0 - 0.0) + 0.0; // roll for exploit/explore chance. State currState = null; if (move_Type >= explore.epsilon) { currState = explore.exploit(); } else { currState = explore.explore(rand); } if (explore.inTerminalState) { } oldState.directionTaken = currState.directionTaken; explore.setRewardValue(currState); explore.setDelta(currState, oldState); explore.eStates[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] += 1; explore.states[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] = explore.updateQTable(oldState.rowIndex, oldState.columnIndex, (int)oldState.directionTaken); explore.eStates[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] = explore.updateETable(oldState.rowIndex, oldState.columnIndex, (int)oldState.directionTaken); oldState = currState; //explore.states[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] = dgv_World.Invoke((MethodInvoker) delegate { dgv_World.Rows[oldState.rowIndex].Cells[oldState.columnIndex].Style.BackColor = Color.LawnGreen; }); Thread.Sleep(200); } while (explore.current_State.isTerminalState == false); dgv_World.Invoke((MethodInvoker) delegate { reset_GUI(); }); //explore.updateQ_StateActionTable(); epochs++; oldExplore = explore; explore = new Explorer(boardState); explore.states = oldExplore.states; explore.current_State = explore.states[explore.startX, explore.startY]; explore.epsilon = oldExplore.epsilon - explore.decay_Rate; if (explore.epsilon < .10) { explore.epsilon = .10; } } explore.saveQ_Table(); if (explore.epsilon <= .1) { Console.Out.WriteLine("In here"); } } exploreThread.Join(); }); exploreThread.Start(); //string first = "Q_TableSave.txt"; //string second = "Q_Save10.txt"; //string[] linesA = File.ReadAllLines(first); //string[] linesb = File.ReadAllLines(second); //int count = 0; //for (int i = 0; i < linesA.Length; i++) //{ // if (linesA[i] != linesb[i]) // { // count++; // } //} //Console.Out.WriteLine(count); }