public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } if (!Q.ContainsKey(prevState.CustomHash())) { Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } if (winner == playerNumber) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, 1); } } else if (winner != playerNumber && winner != 0) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, -1); } } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } return(prevAction); }
public override void EndGame(Virus percept) { if (learn) { double reward = 0; byte winner = percept.Winner; if (winner == playerNumber) { reward = 1; } else if (winner != playerNumber && winner != 0) { reward = -1; } else { reward = 0; } if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }