Example #1
0
        //At the algorithm manager, generate step is ambiguous with actually stepping through the algorithm,
        //Or starting the algorithm, and making the first history entry at step 0.
        //Here, a step only happens when we have been asked by the manager to *actually* take a step.
        public void TakeStep()
        {
            //Get step from qmatrix. Being randomly generated for now.
            move_this_step   = live_qmatrix.GenerateStep(bender_perception_starting); //Tentative; we'll attempt this later. just a random move for now.
            result_this_step = board_data.ApplyMove(move_this_step);                  //The move should be performed now, if possible.
            obtained_reward  = ReinforcementFactors.list[result_this_step];           //Get the reward for this action

            episode_rewards += obtained_reward;                                       //Update the rewards total

            if (result_this_step == MoveResult.can_collected())
            {
                ++cans_collected;
            }

            location_result = new int[2] {
                board_data.bender.x_coordinate, board_data.bender.y_coordinate
            };
            bender_perception_ending = board_data.bender.get_perception_state();

            live_qmatrix.UpdateState(bender_perception_starting, bender_perception_ending, move_this_step, obtained_reward);
            //give the value to the q matrix to digest

            if (GetStepNumber() == Qmatrix.step_limit && GetEpisodeNumber() > Qmatrix.episode_limit)
            {
                algorithm_ended = true;
            }
        }