// The main agent loop IEnumerator performAgentLoop() { float mr_step_reward; float ms_step_reward; int opponent_agent_strategy_type = -1; // 0 => random; 1 => q-learning; 2 => minimax-q if (o_rdm_select) { opponent_agent_strategy_type = 0; //Debug.Log("opponent random"); } else if (o_q_select) { opponent_agent_strategy_type = 1; //Debug.Log("opponent q"); } else if (o_mm_select) { opponent_agent_strategy_type = 2; //Debug.Log("opponent minimax"); } int ms_pac_man_agent_startegy_type = -1; if (rdm_select) { ms_pac_man_agent_startegy_type = 0; //Debug.Log("agent random"); } else if (q_select) { ms_pac_man_agent_startegy_type = 1; //Debug.Log("agent qlearning"); } else if (mm_select) { ms_pac_man_agent_startegy_type = 2; //Debug.Log("agent minimax"); } // yield return new WaitForSeconds(2); // wait for 1 second mrPacMan = new MrPacManAgent(gridController.grid.Explor, gridController.grid.Decay, gridController.grid.Learning_rate, gridController.grid.Discount_factor, opponent_agent_strategy_type, isTraining); msPacMan = new MsPacManAgent(gridController.grid.Explor, gridController.grid.Decay, gridController.grid.Learning_rate, gridController.grid.Discount_factor, ms_pac_man_agent_startegy_type, isTraining); while (agentsRunning) { // yield return new WaitForSeconds(2); // wait for 1 second yield return(null); // get the current state mr_curr_state = gridController.grid.GetCell(gridController.MrPy, gridController.MrPx); ms_curr_state = gridController.grid.GetCell(gridController.MsPy, gridController.MsPx); int mr_pac_man_action; int ms_pac_man_action; // get each agent's action mr_pac_man_action = mrPacMan.getAction(gridController.grid.GetCell(gridController.MrPy, gridController.MrPx)); ms_pac_man_action = msPacMan.getAction(gridController.grid.GetCell(gridController.MsPy, gridController.MsPx)); // in a random order, apply each agents action if (UnityEngine.Random.Range(0, 2) == 0) { // fist player will always get a chance to move // calculate reward based on chosen step and current state mr_step_reward = calculateStepReward(mr_curr_state, mr_pac_man_action, 1); // update mr pac man's score before moving action_return_mr = applyMrPacManAction(mr_pac_man_action); //Debug.Log("mr step reward: " + mr_step_reward); // second player might have been eaten if (action_return_mr == 2) { mr_step_reward = 100f; ms_step_reward = -100f; } else if (action_return_mr == 3) { // or get to eat mr_step_reward = -100f; ms_step_reward = 100f; } else { // calculate the reward after the other agent has already completed their action, but before taking the action ms_step_reward = calculateStepReward(ms_curr_state, ms_pac_man_action, 0); action_return_ms = applyMsPacManAction(ms_pac_man_action); } } else //otherwise ms pac man goes first { ms_step_reward = calculateStepReward(ms_curr_state, ms_pac_man_action, 0); action_return_ms = applyMsPacManAction(ms_pac_man_action); //Debug.Log("ms step reward: " + ms_step_reward); if (action_return_ms == 2) { mr_step_reward = -100f; ms_step_reward = 100f; } else if (action_return_ms == 3) { // or get to eat mr_step_reward = 100f; ms_step_reward = -100f; } else { // most of the time mr will just move mr_step_reward = calculateStepReward(mr_curr_state, mr_pac_man_action, 1); // update mr pac man's score before moving action_return_mr = applyMrPacManAction(mr_pac_man_action); } } // update global score gridController.AddPoints(1, mr_step_reward); //gridController.mr_reward += mr_step_reward; gridController.AddPoints(0, ms_step_reward); //gridController.ms_reward += ms_step_reward; training_curr_step += 1; gridController.txtStep_MS.text = training_curr_step.ToString(); gridController.txtStep_MR.text = training_curr_step.ToString(); // get the new state of both players mr_new_state = gridController.grid.GetCell(gridController.MrPy, gridController.MrPx); ms_new_state = gridController.grid.GetCell(gridController.MsPy, gridController.MsPx); //Debug.Log("took actions"); // LEARN //each agent should recieve some kind of reward // probably use multithreading so both agents can learn in parallel if (isTraining) // only learn if in training mode, not testing mode { mrPacMan.learn(mr_curr_state, mr_new_state, mr_step_reward, mr_pac_man_action, ms_pac_man_action); // q learning does not use opponent's action msPacMan.learn(ms_curr_state, ms_new_state, ms_step_reward, ms_pac_man_action, mr_pac_man_action); // minimax q } if (curr_step >= max_steps) { //agentsRunning = false; // stop the current thread from running gridController.ResetTable(); // reset the game table curr_step = 0; //StartCoroutine(performAgentLoop()); // start a new loop of training on the new table } else { curr_step++; } if (gridController.grid.isNoCandies() == true) { if (gridController.curr_match_score_mr > gridController.curr_match_score_ms) // if mr has more points this match { gridController.AddWin(1); // he gets a win } else if (gridController.curr_match_score_ms > gridController.curr_match_score_mr) // if ms has more points this match { gridController.AddWin(0); //gridController.games_won_ms += 1; // she gets a win } gridController.ResetTable(); } // if game over the stop the agents from running if (training_curr_step >= max_training_steps) { // exit training //Debug.Log("QUIT TRAINING"); agentsRunning = false; gridController.ResetTable(); } } }