//function for retrieving the best response for a given initial action public RPSAction GetActionForStrategy(RPSAction strategy) { RPSAction action; //implement basic rules of the game switch (strategy) { default: action = RPSAction.Scissors; break; case RPSAction.Paper: action = RPSAction.Scissors; break; case RPSAction.Rock: action = RPSAction.Paper; break; case RPSAction.Scissors: action = RPSAction.Rock; break; } return(action); }
//handle the overall actions of the game and inform the algorithm of the //actions from the opponent, in this case, the player's actions public void TellOpponentAction(RPSAction action) { totalActions++; float utility = GetUtility(lastAction, action); score[(int)lastAction] += utility; count[(int)lastAction] += 1; }
public void TellOpponentActionRM(RPSAction action) { lastOpponentAction = action; for (int i = 0; i < numActions; i++) { regret[i] += GetUtility((RPSAction)lastActionRM[i], (RPSAction)action); regret[i] -= GetUtility((RPSAction)lastAction, (RPSAction)action); } }
//computes the next action to be taken public RPSAction GetNextActionRM() { float sum = 0f; float prob = 0f; InitRegretMatching(); //explore all available options and hold the response to be taken for (int i = 0; i < numActions; i++) { lastActionRM[i] = GetActionForStrategy((RPSAction)i); } //sum the overall regret for (int i = 0; i < numActions; i++) { if (regret[i] > 0f) { sum += regret[i]; } } //return a random action if the sum is less than or equal to 0 if (sum <= 0f) { lastAction = (RPSAction)Random.Range(0, numActions); return(lastAction); } //explore the set of actions and sum the chance of regretting them for (int i = 0; i < numActions; i++) { chance[i] = 0f; if (regret[i] > 0f) { chance[i] = regret[i]; } if (i > 0) { chance[i] += chance[i - 1]; } } //computes a random probability and compare that to the chance of taking //the actions. Returns the first one to be greater than the probability computed prob = Random.value; for (int i = 0; i < numActions; i++) { if (prob < chance[i]) { lastStrategy = i; lastAction = lastActionRM[i]; return(lastAction); } } return((RPSAction)(numActions - 1)); }
public void TellOpponentAction(RPSAction action) { totalActions++; float utility; utility = GetUtility(lastAction, action); score[(int)lastAction] += utility; count[(int)lastAction] += 1; Debug.Log("Score: Piedra " + score[0] + " Papel " + score[1] + " Tijera " + score[2] + " Lagarto " + score[3] + " Spock " + score[4]); Debug.Log("Count: Piedra " + count[0] + " Papel " + count[1] + " Tijera " + count[2] + " Lagarto " + count[3] + " Spock " + count[4]); }
public RPSAction GetNextActionRM() { InitRegretMatching(); int i; for (i = 0; i < numActions; i++) { lastActionRM[i] = GetActionForStrategy((RPSAction)i); } float sum = 0f; for (i = 0; i < numActions; i++) { if (regret[i] > 0f) { sum += regret[i]; } } if (sum <= 0f) { lastAction = (RPSAction)Random.Range(0, numActions); return(lastAction); } for (i = 0; i < numActions; i++) { chance[i] = 0f; if (regret[i] > 0f) { chance[i] = regret[i]; } if (i > 0) { chance[i] += chance[i - 1]; } } float prob = Random.value; for (i = 0; i < numActions; i++) { if (prob < chance[i]) { lastStrategy = i; lastAction = lastActionRM[i]; return(lastAction); } } return((RPSAction)(numActions - 1)); }
public RPSAction GetNextActionUCB1() { int i, best; float bestScore; float tempScore; InitUCB1(); for (i = 0; i < numActions; i++) { if (count[i] == 0) { lastStrategy = i; Debug.Log(lastStrategy); lastAction = GetActionForStrategy((RPSAction)i); return(lastAction); } } best = 0; bestScore = score[best] / (float)count[best]; float input = Mathf.Log(totalActions / (float)count[best]); input *= 2f; bestScore += Mathf.Sqrt(input); for (i = 0; i < numActions; i++) { tempScore = score[i] / (float)count[i]; input = Mathf.Log(totalActions / (float)count[best]); input *= 2f; tempScore = Mathf.Sqrt(input); if (tempScore > bestScore) { best = i; bestScore = tempScore; } } lastStrategy = best; lastAction = GetActionForStrategy((RPSAction)best); return(lastAction); }
//compute the next action to be taken by the agent public RPSAction GetNextActionUCB1() { int best; float bestScore; float tempScore; InitUCB1(); //check the number of actions available. If an action hasn't been //explored, return it for (int i = 0; i < numActions; i++) { if (count[i] == 0) { lastStrategy = i; lastAction = GetActionForStrategy((RPSAction)i); return(lastAction); } } //variables for computing the best score best = 0; bestScore = score[best] / count[best]; float input = Mathf.Log(totalActions / count[best]); input *= 2f; bestScore += Mathf.Sqrt(input); //check all actions available for (int i = 0; i < numActions; i++) { //compute the best score tempScore = score[i] / count[i]; input = Mathf.Log(totalActions / count[best]); input *= 2f; tempScore = Mathf.Sqrt(input); if (tempScore > bestScore) { best = i; bestScore = tempScore; } } //return best strategy lastStrategy = best; lastAction = GetActionForStrategy((RPSAction)best); return(lastAction); }
//function for computing the utility of an action, based on the opponent's one //initially it is a draw public float GetUtility(RPSAction myAction, RPSAction opponentsAction) { float utility = 0f; //check whether the opponent played paper if (opponentsAction == RPSAction.Paper) { if (myAction == RPSAction.Rock) { utility = -1f; } else if (myAction == RPSAction.Scissors) { utility = 1f; } } else if (opponentsAction == RPSAction.Rock) { //check whether the opponent played rock if (myAction == RPSAction.Paper) { utility = 1f; } else if (myAction == RPSAction.Scissors) { utility = -1f; } } else { //check whether the opponent player scissors if (myAction == RPSAction.Rock) { utility = -1f; } else if (myAction == RPSAction.Paper) { utility = 1f; } } return(utility); }
public RPSAction GetActionForStrategy(RPSAction strategy) { RPSAction action; switch (strategy) { default: case RPSAction.Paper: action = RPSAction.Scissors; break; case RPSAction.Rock: action = RPSAction.Paper; break; case RPSAction.Scissors: action = RPSAction.Rock; break; } return(action); }
public RPSAction GetActionForStrategy(RPSAction strategy) { RPSAction action; switch (strategy) { default: case RPSAction.Paper: action = RPSAction.Spok; ImagenIA.GetComponent <Image>().sprite = SPOCK; TellOpponentAction(action); break; case RPSAction.Rock: action = RPSAction.Paper; ImagenIA.GetComponent <Image>().sprite = PAPER; TellOpponentAction(action); break; case RPSAction.Scissors: action = RPSAction.Rock; ImagenIA.GetComponent <Image>().sprite = ROCK; TellOpponentAction(action); break; case RPSAction.Lizzard: action = RPSAction.Scissors; ImagenIA.GetComponent <Image>().sprite = SCISSORS; TellOpponentAction(action); break; case RPSAction.Spok: action = RPSAction.Lizzard; ImagenIA.GetComponent <Image>().sprite = LIZARD; TellOpponentAction(action); break; } return(action); }
public float GetUtility(RPSAction myAction, RPSAction opponents) { float utility = 0f; if (opponents == RPSAction.Paper) { if (myAction == RPSAction.Rock) { utility = -1f; } else if (myAction == RPSAction.Scissors) { utility = 1f; } } else if (opponents == RPSAction.Rock) { if (myAction == RPSAction.Paper) { utility = 1f; } else if (myAction == RPSAction.Scissors) { utility = -1f; } } else { if (myAction == RPSAction.Rock) { utility = -1f; } else if (myAction == RPSAction.Paper) { utility = 1f; } } return(utility); }