private double GetMaxQ(VirusBoard state) { double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) { value = 0; } else { value = Q[state.CustomHash()][a.CustomHash()]; } if (value > max) { max = value; } } if (Q[state.CustomHash()].ContainsKey(0)) { if (Q[state.CustomHash()][0] > max) { max = Q[state.CustomHash()][0]; } } return(max); }
public override Move Move(Virus percept) { VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = 0; if (Q.ContainsKey(currentState.CustomHash())) { if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash())) { q = Q[currentState.CustomHash()][a.CustomHash()]; } } q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == 1) { break; } } return(action); }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = double.NegativeInfinity; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); bool berandom = random.NextDouble() < RandomRate; foreach (Move a in actions) { double value = 0; if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { if (Q[state.CustomHash()][a.CustomHash()] >= 1) { value = 1; max = value; action = a; break; } else if (Q[state.CustomHash()][a.CustomHash()] <= -1) { value = -1; } else { if (berandom) { value = random.NextDouble(); } else { value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0); } } } else { value = 1; } if (value > max) { max = value; action = a; } } return(action); }
//Calc maxValue private double MaxValue(VirusBoard state, int iteration) { iteration++; if (state.winner == playerNumber) { return(double.PositiveInfinity); } if (state.winner != playerNumber && state.winner != 0) { return(double.NegativeInfinity); } if (iteration < searchLength) { Move[] actions = state.GetPossibleMoves(playerNumber); double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = state.GetUpdated(a); double q = Utility(state, newState); if (Q.ContainsKey(state.CustomHash())) { if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { q = Q[state.CustomHash()][a.CustomHash()]; } } q += MinValue(newState, iteration); if (q > max) { max = q; } if (max == double.PositiveInfinity) { return(max); } } return(max); } else { return(0); } }
public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } if (!Q.ContainsKey(prevState.CustomHash())) { Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } if (winner == playerNumber) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, 1); } } else if (winner != playerNumber && winner != 0) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, -1); } } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } return(prevAction); }
public override void EndGame(Virus percept) { if (learn) { double reward = 0; byte winner = percept.Winner; if (winner == playerNumber) { reward = 1; } else if (winner != playerNumber && winner != 0) { reward = -1; } else { reward = 0; } if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }