public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } if (!Q.ContainsKey(prevState.CustomHash())) { Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } if (winner == playerNumber) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, 1); } } else if (winner != playerNumber && winner != 0) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, -1); } } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } return(prevAction); }