Example #1
0
 public State exploitMove()
 {
     //current action, state already initialized by this point.
     //take action A here.
     int[] newStateLoc = getMove(this.oldState, oldAction);
     stepsTaken.Add(oldState);
     //get S prime here.
     newState = getNextState(newStateLoc[0], newStateLoc[1]);
     //Observe the reward of S prime.
     reward = getReward(newStateLoc[0], newStateLoc[1]);
     //Now it's time exploit.
     //Get the A prime from S prime.
     newAction = newState.exploit_Move();
     //get delta here
     delta = reward + gamma * newState.moveWeights[(int)newAction] - oldState.moveWeights[(int)oldAction];
     //set eligibilty here
     eStates[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldAction] += 1;
     //For all S, A update
     for (int rows = 0; rows < 20; rows++)
     {
         for (int cols = 0; cols < 20; cols++)
         {
             states[rows, cols].moveWeights[(int)oldAction] = states[rows, cols].moveWeights[(int)oldAction]
                                                              + learnRate * delta * eStates[rows, cols].moveWeights[(int)oldAction];
             eStates[rows, cols].moveWeights[(int)oldAction] = lambda * gamma * eStates[rows, cols].moveWeights[(int)oldAction];
         }
     }
     //Update s to s' and a to a' here.
     oldAction = newAction;
     oldState  = newState;
     return(newState);
 }
Example #2
0
        public int[] getMove(State currState, State.move moveDirection)
        {
            int[] moveCoordinates = new int[2];
            switch (moveDirection)
            {
            case State.move.UP:
                moveCoordinates = new int[] { currState.rowIndex - 1, currState.columnIndex };
                return(moveCoordinates);

            case State.move.DOWN:
                moveCoordinates = new int[] { currState.rowIndex + 1, currState.columnIndex };
                return(moveCoordinates);

            case State.move.LEFT:
                moveCoordinates = new int[] { currState.rowIndex, currState.columnIndex - 1 };
                return(moveCoordinates);

            case State.move.RIGHT:
                moveCoordinates = new int[] { currState.rowIndex, currState.columnIndex + 1 };
                return(moveCoordinates);

            default:
                moveCoordinates = new int[] { 0, 0 };
                return(moveCoordinates);
            }
        }
Example #3
0
 public State exploit()
 {
     stepsTaken.Add(current_State);
     State.move moveDir = current_State.exploit_Move();
     states[current_State.rowIndex, current_State.columnIndex].directionTaken = moveDir;
     directionMoved = moveDir;
     current_State.directionTaken = moveDir;
     if (moveDir == State.move.UP)
     {
         bool terminal = checkForTerminal(current_State.rowIndex, current_State.columnIndex - 1);
         if (terminal)
         {
             return(current_State);
         }
         else
         {
             current_State = states[current_State.rowIndex, current_State.columnIndex - 1];
         }
     }
     else if (moveDir == State.move.DOWN)
     {
         bool terminal = checkForTerminal(current_State.rowIndex, current_State.columnIndex + 1);
         if (terminal)
         {
             return(current_State);
         }
         else
         {
             current_State = states[current_State.rowIndex, current_State.columnIndex + 1];
         }
     }
     else if (moveDir == State.move.LEFT)
     {
         bool terminal = checkForTerminal(current_State.rowIndex - 1, current_State.columnIndex);
         if (terminal)
         {
             return(current_State);
         }
         else
         {
             current_State = states[current_State.rowIndex - 1, current_State.columnIndex];
         }
     }
     else
     {
         bool terminal = checkForTerminal(current_State.rowIndex + 1, current_State.columnIndex);
         if (terminal)
         {
             return(current_State);
         }
         else
         {
             current_State = states[current_State.rowIndex + 1, current_State.columnIndex];
         }
     }
     return(current_State);
 }
Example #4
0
        private void b_Start_Click(object sender, EventArgs e)
        {
            DateTime end    = DateTime.UtcNow.AddHours(7.5);
            Color    green  = Color.FromArgb(0, 100, 0);
            int      epochs = 0;
            Random   rand   = new Random();

            exploreThread = new Thread(() =>
            {
                while (explore.epsilon >= .05)
                {
                    //run 100 times before saving QSA to file
                    //to reduce I/O usage.
                    for (int i = 0; i < 100; i++)
                    {
                        explore.reset_ETable();
                        State currState       = explore.oldState;
                        explore.oldAction     = currState.getInitAction(rand);
                        State.move currAction = explore.oldAction;
                        dgv_World.Invoke((MethodInvoker) delegate
                        {
                            dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Style.BackColor = Color.Green;
                        });
                        do
                        {
                            double exploreChance = rand.NextDouble() * (99.999999 - 0.000) + 0.000;
                            if (exploreChance <= explore.epsilon * 100)
                            {
                                explore.exploreMove(rand);
                            }
                            else
                            {
                                explore.exploitMove();
                            }
                            if (explore.checkTerminal(explore.newState.rowIndex, explore.newState.columnIndex))
                            {
                                explore.inTerminalState = true;
                            }
                            currState  = explore.newState;
                            currAction = explore.newAction;
                            dgv_World.Invoke((MethodInvoker) delegate
                            {
                                dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Style.BackColor = Color.LightGreen;
                                string move = "";
                                switch (currAction)
                                {
                                case State.move.UP:
                                    move = "U";
                                    break;

                                case State.move.DOWN:
                                    move = "D";
                                    break;

                                case State.move.LEFT:
                                    move = "L";
                                    break;

                                case State.move.RIGHT:
                                    move = "R";
                                    break;
                                }
                                dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Value = move;
                            });

                            if (explore.inTerminalState)
                            {
                                dgv_World.Invoke((MethodInvoker) delegate
                                {
                                    dgv_World.Rows[currState.rowIndex].Cells[currState.columnIndex].Style.BackColor = Color.Red;
                                });
                            }
                            Thread.Sleep(150);
                        } while (explore.oldState.isTerminalState == false);


                        explore.epsilon = explore.epsilon - explore.decay_Rate;
                        if (explore.epsilon <= .10)
                        {
                            explore.epsilon = .10;
                        }
                        reset_GUI();
                        explore.stepsTaken.Clear();
                        explore.resetStart(rand);
                    }
                    explore.saveQ_Table();
                    if (DateTime.UtcNow > end)
                    {
                        break;
                    }
                }

                Application.Exit();
                exploreThread.Join();
            });
            exploreThread.Start();
            //string first = "Q_TableSave.txt";
            //string second = "Q_Save10.txt";
            //string[] linesA = File.ReadAllLines(first);
            //string[] linesb = File.ReadAllLines(second);
            //int count = 0;

            //for (int i = 0; i < linesA.Length; i++)
            //{
            //    if (linesA[i] != linesb[i])
            //    {
            //        count++;
            //    }
            //}
            //Console.Out.WriteLine(count);
        }
Example #5
0
        private void b_Start_Click(object sender, EventArgs e)
        {
            Color  green  = Color.FromArgb(0, 100, 0);
            int    epochs = 0;
            Random rand   = new Random();

            exploreThread = new Thread(() =>
            {
                for (int runs = 0; runs < 1000000; runs++)
                {
                    for (int i = 0; i < 100; i++)
                    {
                        dgv_World.Rows[explore.startX].Cells[explore.startY].Style.BackColor = Color.Green;
                        explore.reset_ETable();
                        State oldState          = explore.current_State;
                        State.move oldDirection = State.move.UP;

                        do
                        {
                            double move_Type = rand.NextDouble() * (1.0 - 0.0) + 0.0; // roll for exploit/explore chance.
                            State currState  = null;

                            if (move_Type >= explore.epsilon)
                            {
                                currState = explore.exploit();
                            }
                            else
                            {
                                currState = explore.explore(rand);
                            }
                            if (explore.inTerminalState)
                            {
                            }
                            oldState.directionTaken = currState.directionTaken;

                            explore.setRewardValue(currState);
                            explore.setDelta(currState, oldState);
                            explore.eStates[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] += 1;

                            explore.states[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken]  = explore.updateQTable(oldState.rowIndex, oldState.columnIndex, (int)oldState.directionTaken);
                            explore.eStates[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] = explore.updateETable(oldState.rowIndex, oldState.columnIndex, (int)oldState.directionTaken);
                            oldState = currState;

                            //explore.states[oldState.rowIndex, oldState.columnIndex].moveWeights[(int)oldState.directionTaken] =

                            dgv_World.Invoke((MethodInvoker) delegate
                            {
                                dgv_World.Rows[oldState.rowIndex].Cells[oldState.columnIndex].Style.BackColor = Color.LawnGreen;
                            });
                            Thread.Sleep(200);
                        } while (explore.current_State.isTerminalState == false);
                        dgv_World.Invoke((MethodInvoker) delegate
                        {
                            reset_GUI();
                        });
                        //explore.updateQ_StateActionTable();
                        epochs++;
                        oldExplore            = explore;
                        explore               = new Explorer(boardState);
                        explore.states        = oldExplore.states;
                        explore.current_State = explore.states[explore.startX, explore.startY];
                        explore.epsilon       = oldExplore.epsilon - explore.decay_Rate;
                        if (explore.epsilon < .10)
                        {
                            explore.epsilon = .10;
                        }
                    }
                    explore.saveQ_Table();
                    if (explore.epsilon <= .1)
                    {
                        Console.Out.WriteLine("In here");
                    }
                }


                exploreThread.Join();
            });
            exploreThread.Start();
            //string first = "Q_TableSave.txt";
            //string second = "Q_Save10.txt";
            //string[] linesA = File.ReadAllLines(first);
            //string[] linesb = File.ReadAllLines(second);
            //int count = 0;

            //for (int i = 0; i < linesA.Length; i++)
            //{
            //    if (linesA[i] != linesb[i])
            //    {
            //        count++;
            //    }
            //}
            //Console.Out.WriteLine(count);
        }