private double GetMaxQ(VirusBoard state) { double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state].ContainsKey(a)) { value = 0; } else { value = Q[state][a]; } if (value > max) { max = value; } } if (Q[state].ContainsKey(default(Move))) { if (Q[state][default(Move)] > max) { max = Q[state][default(Move)]; } } return(max); }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = -10; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q.ContainsKey(state)) { Q.Add(state, new Dictionary <Move, double>()); } if (Q[state].ContainsKey(a)) { value = Q[state][a] + 1 / (double)N[state][a]; } else { value = 1; } if (value > max) { max = value; action = a; } } return(action); }
public override void EndGame(Virus percept) { Move(percept); prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }
public VirusMemory(VirusBoard start, Move action, VirusBoard end, double reward) { StartState = start; Action = action; EndState = end; Reward = reward; }
private double GetMaxQ(VirusBoard state) { double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) { value = 0; } else { value = Q[state.CustomHash()][a.CustomHash()]; } if (value > max) { max = value; } } if (Q[state.CustomHash()].ContainsKey(0)) { if (Q[state.CustomHash()][0] > max) { max = Q[state.CustomHash()][0]; } } return(max); }
public override Move Move(Virus percept) { VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = 0; if (Q.ContainsKey(currentState.CustomHash())) { if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash())) { q = Q[currentState.CustomHash()][a.CustomHash()]; } } q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == 1) { break; } } return(action); }
public VirusMemory(VirusBoard start, Move action, VirusBoard end) { StartState = start; Action = action; EndState = end; Reward = VirusNameSpace.Agents.MemoryQAgent.Reward(start, end); }
public override Move Move(Virus percept) { //Stopwatch watch = new Stopwatch(); //watch.Start(); VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = Utility(currentState, newState); q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == double.PositiveInfinity) { break; } } //watch.Stop(); //StreamWriter timeWriter = new StreamWriter("mmTimeLog",true); //timeWriter.WriteLine(watch.ElapsedMilliseconds); // + " ; " + watch.ElapsedTicks); //timeWriter.Close(); return(action); }
public override Move Move(Virus percept) { VirusBoard state = percept.GetBoardCopy(); Move[] actions = state.GetPossibleMoves(playerNumber); if (actions.Length < 1) { return(default(Move)); } Move action; if (random.NextDouble() > randomRatio) // bruteforce { List <Move> list = new List <Move>(); int maxtaken = -1; foreach (Move a in actions) { int temp = state.TakeablePieces(a); if (a.IsLongMove) { temp--; } if (temp > maxtaken) { maxtaken = temp; list.Clear(); list.Add(a); } else if (temp == maxtaken) { list.Add(a); } } if (deterministic) { action = list[0]; } else { action = list[random.Next(list.Count)]; } } else // random { if (deterministic) { action = actions[0]; } else { action = actions[random.Next(actions.Length)]; } } return(action); }
public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } if (!Q.ContainsKey(prevState.CustomHash())) { Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } if (winner == playerNumber) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, 1); } } else if (winner != playerNumber && winner != 0) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, -1); } } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = double.NegativeInfinity; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); bool berandom = random.NextDouble() < RandomRate; foreach (Move a in actions) { double value = 0; if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { if (Q[state.CustomHash()][a.CustomHash()] >= 1) { value = 1; max = value; action = a; break; } else if (Q[state.CustomHash()][a.CustomHash()] <= -1) { value = -1; } else { if (berandom) { value = random.NextDouble(); } else { value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0); } } } else { value = 1; } if (value > max) { max = value; action = a; } } return(action); }
//Calc maxValue private double MaxValue(VirusBoard state, int iteration) { iteration++; if (state.winner == playerNumber) { return(double.PositiveInfinity); } if (state.winner != playerNumber && state.winner != 0) { return(double.NegativeInfinity); } if (iteration < searchLength) { Move[] actions = state.GetPossibleMoves(playerNumber); double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = state.GetUpdated(a); double q = Utility(state, newState); if (Q.ContainsKey(state.CustomHash())) { if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { q = Q[state.CustomHash()][a.CustomHash()]; } } q += MinValue(newState, iteration); if (q > max) { max = q; } if (max == double.PositiveInfinity) { return(max); } } return(max); } else { return(0); } }
// Calc minValue private double MinValue(VirusBoard state, int iteration) { iteration++; if (state.winner == playerNumber) { return(double.PositiveInfinity); } if (state.winner != playerNumber && state.winner != 0) { return(double.NegativeInfinity); } if (iteration < searchLength) { byte opponent = (playerNumber == 1) ? (byte)2 : (byte)1; Move[] actions = state.GetPossibleMoves(opponent); double min = double.PositiveInfinity; foreach (Move a in actions) { VirusBoard newState = state.GetUpdated(a); double q = Utility(state, newState); q += MaxValue(newState, iteration); if (q < min) { min = q; } if (min == double.NegativeInfinity) { return(min); } } return(min); } else { return(0); } }
public override void EndGame(Virus percept) { if (learn) { double reward = 0; byte winner = percept.Winner; if (winner == playerNumber) { reward = 1; } else if (winner != playerNumber && winner != 0) { reward = -1; } else { reward = 0; } if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }
private double Utility(VirusBoard currentState, VirusBoard nextState) { int orgPieces = 0; foreach (byte b in currentState.board) { if (b == playerNumber) { orgPieces++; //orgPieces += orgPieces + 2; } else if (b != playerNumber && b != 0) { orgPieces--; } } int newPieces = 0; foreach (byte b in nextState.board) { if (b == playerNumber) { newPieces++; //newPieces += newPieces + 2; } else if (b != playerNumber && b != 0) { newPieces--; } } double difference = newPieces - orgPieces; difference *= 0.1; return(difference); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } return(prevAction); }