//Copied from another algorithm state //We reset some data, so we dont reflect values that aren't true for the new state //This constructor is called when a new step is being generated, so we transfer some values appropriately. public AlgorithmState(AlgorithmState set_from) { cans_collected = set_from.cans_collected; episode_rewards = set_from.episode_rewards; //Reward data total_rewards = set_from.total_rewards; board_data = new GameBoard(set_from.board_data); //Copy the board //Increase steps in here live_qmatrix = new Qmatrix(set_from.live_qmatrix); //Copy the q matrix //The initial location will be the resulting location of the last step location_initial = new int[2] { set_from.location_result[0], set_from.location_result[1] }; bender_perception_starting = set_from.bender_perception_ending; //Detect if we reached the limit for this episode if (live_qmatrix.step_number == Qmatrix.step_limit) { StartNewEpisode(); } else { live_qmatrix.step_number++; } }
//Called from create_empty_board (after reset), and the constructor //Just a useful container for resetting some values when we want to start over, but making a new state would have us lose bender's position. private void InitializeValues() { board_data.ClearCans(); //Clear the board for our initial launch(this doesn't remove bender, just cans) live_qmatrix = new Qmatrix(); location_initial = new int[2] { 0, 0 }; location_result = new int[2] { 0, 0 }; }
public Qmatrix(Qmatrix copy_from) { //Copy the q-matrix. matrix_data = new Dictionary <PerceptionState, ValueSet>(); foreach (var i in copy_from.matrix_data.Keys) { //For each list of strings in copy_from.matrix data //Get a copy of the dictionary at this list of strings //Should be a deep copy matrix_data.Add(i, new ValueSet(copy_from.matrix_data[i])); } did_we_update = copy_from.did_we_update; step_number = copy_from.step_number; episode_number = copy_from.episode_number; n_current = copy_from.n_current; y_current = copy_from.y_current; e_current = copy_from.e_current; }