public override Move Move(Virus percept) { VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = 0; if (Q.ContainsKey(currentState.CustomHash())) { if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash())) { q = Q[currentState.CustomHash()][a.CustomHash()]; } } q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == 1) { break; } } return action; }
public override Move Move(Virus percept) { VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = 0; if (Q.ContainsKey(currentState.CustomHash())) { if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash())) { q = Q[currentState.CustomHash()][a.CustomHash()]; } } q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == 1) { break; } } return(action); }
public override Move Move(Virus percept) { //Stopwatch watch = new Stopwatch(); //watch.Start(); VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = Utility(currentState, newState); q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == double.PositiveInfinity) { break; } } //watch.Stop(); //StreamWriter timeWriter = new StreamWriter("mmTimeLog",true); //timeWriter.WriteLine(watch.ElapsedMilliseconds); // + " ; " + watch.ElapsedTicks); //timeWriter.Close(); return action; }
public override Move Move(Virus percept) { //Stopwatch watch = new Stopwatch(); //watch.Start(); VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move action = actions[0]; double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = currentState.GetUpdated(a); double q = Utility(currentState, newState); q += MinValue(newState, 0); if (q > max) { max = q; action = a; } if (max == double.PositiveInfinity) { break; } } //watch.Stop(); //StreamWriter timeWriter = new StreamWriter("mmTimeLog",true); //timeWriter.WriteLine(watch.ElapsedMilliseconds); // + " ; " + watch.ElapsedTicks); //timeWriter.Close(); return(action); }
public override Move Move(Virus percept) { VirusBoard state = percept.GetBoardCopy(); Move[] actions = state.GetPossibleMoves(playerNumber); if (actions.Length < 1) { return(default(Move)); } Move action; if (random.NextDouble() > randomRatio) // bruteforce { List <Move> list = new List <Move>(); int maxtaken = -1; foreach (Move a in actions) { int temp = state.TakeablePieces(a); if (a.IsLongMove) { temp--; } if (temp > maxtaken) { maxtaken = temp; list.Clear(); list.Add(a); } else if (temp == maxtaken) { list.Add(a); } } if (deterministic) { action = list[0]; } else { action = list[random.Next(list.Count)]; } } else // random { if (deterministic) { action = actions[0]; } else { action = actions[random.Next(actions.Length)]; } } return(action); }
public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } if (!Q.ContainsKey(prevState.CustomHash())) { Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } if (winner == playerNumber) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, 1); } } else if (winner != playerNumber && winner != 0) { if (!Q[newState.CustomHash()].ContainsKey(0)) { Q[newState.CustomHash()].Add(0, -1); } } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>()); if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>()); if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); if (!Q.ContainsKey(prevState.CustomHash())) Q.Add(prevState.CustomHash(), new Dictionary<UInt32, double>()); if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); if (winner == playerNumber) { if (!Q[newState.CustomHash()].ContainsKey(0)) Q[newState.CustomHash()].Add(0, 1); } else if (winner != playerNumber && winner != 0) { if (!Q[newState.CustomHash()].ContainsKey(0)) Q[newState.CustomHash()].Add(0, -1); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return prevAction; }
public override Move Move(Virus percept) { VirusBoard state = percept.GetBoardCopy(); Move[] actions = state.GetPossibleMoves(playerNumber); if (actions.Length < 1) return default(Move); Move action; if (random.NextDouble() > randomRatio) { // bruteforce List<Move> list = new List<Move>(); int maxtaken = -1; foreach (Move a in actions) { int temp = state.TakeablePieces(a); if (a.IsLongMove) temp--; if (temp > maxtaken) { maxtaken = temp; list.Clear(); list.Add(a); } else if (temp == maxtaken) { list.Add(a); } } if (deterministic) { action = list[0]; } else { action = list[random.Next(list.Count)]; } } else { // random if (deterministic) action = actions[0]; else action = actions[random.Next(actions.Length)]; } return action; }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) { Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>()); } if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) { N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); } N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) { Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); } return(prevAction); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>()); if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>()); if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); return prevAction; }