public List <PlayerTask> AvailableOptions(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes, bool playCards = true) { if (currentNode == null) { Log.Instance().Append(currentNode.FullPathPrint()); Log.Instance().Append(currentNode.FullPrint()); } if (currentNode.currentPOGame == null) { Log.Instance().Append(currentNode.FullPathPrint()); Log.Instance().Append(currentNode.FullPrint()); } List <PlayerTask> options = currentNode.currentPOGame.CurrentPlayer.Options(playCards); List <PlayerTask> availableOptions = new List <PlayerTask>(); foreach (PlayerTask option in options) { if (availableOptionTypes.Contains(option.PlayerTaskType)) { availableOptions.Add(option); } } return(availableOptions); }
public UCTSimulator(SabberStoneCoreAi.POGame.POGame poGame, int randomSeed, Random rnd, double ucb1Coef, int mcSearchDepth, int mcOpSearchDepth, int maxOpOnePlayOutTimes, int maxOnePlayOutTimes, int[,] BoardCoef, int[] MinionsCoef, int[] OpMinionsCoef) { this.root = Root(poGame); this.rndSeed = randomSeed; if (rnd != null) { this.rnd = rnd; } else { this.rnd = new Random(this.rndSeed); } this.ucb1Coef = ucb1Coef; this.mcSearchDepth = mcSearchDepth; this.mcOpSearchDepth = mcOpSearchDepth; this.maxOpOnePlayOutTimes = maxOpOnePlayOutTimes; this.maxOnePlayOutTimes = maxOnePlayOutTimes; this.BoardCoef = BoardCoef; this.MinionsCoef = MinionsCoef; this.OpMinionsCoef = OpMinionsCoef; availableOptionTypes.Add(PlayerTaskType.CHOOSE); availableOptionTypes.Add(PlayerTaskType.HERO_ATTACK); availableOptionTypes.Add(PlayerTaskType.HERO_POWER); availableOptionTypes.Add(PlayerTaskType.MINION_ATTACK); availableOptionTypes.Add(PlayerTaskType.PLAY_CARD); availableOpOptionTypes.Add(PlayerTaskType.HERO_ATTACK); availableOpOptionTypes.Add(PlayerTaskType.HERO_POWER); availableOpOptionTypes.Add(PlayerTaskType.MINION_ATTACK); }
public UCTNode GetChild(int j) { UCTNode child = null; this.childDict.TryGetValue(j, out child); return(child); }
public UCTNode SimulateEndTurn(UCTNode currentNode) { PlayerTask endTurnOption = GetEndTurnOption(currentNode); POGame.POGame simulatedPOGame = null; if (endTurnOption == null) { return(null); } simulatedPOGame = SimulateOption(currentNode.currentPOGame, endTurnOption); //if (endTurnOption == null) //{ // List<PlayerTask> options = currentNode.currentPOGame.CurrentPlayer.Options(); // int rndOptionNo = rnd.Next(options.Count()); // PlayerTask rndOption = options[rndOptionNo]; // POGame.POGame tmpGame = SimulateOption(currentNode.currentPOGame, rndOption); // endTurnOption = GetEndTurnOption(tmpGame); // simulatedPOGame = SimulateOption(tmpGame, endTurnOption); //} //else //{ //} UCTNode opRootNode = new UCTNode(0, 0, 0, 0, null, endTurnOption, simulatedPOGame, new Dictionary <int, UCTNode>()); return(opRootNode); }
public double EvaluateUCTNode(UCTNode currentPlayerLeafNode, long accumulativeMillisecond) { double reward = Double.MinValue; UCTNode opRootNode = SimulateEndTurn(currentPlayerLeafNode); if (opRootNode == null) // choice turn no endturn option { } else { // if no endturn option ,there is a choice option UCTNode bestOpNode = SimulateOppenont(opRootNode, accumulativeMillisecond); if (bestOpNode == null) // oppent no avaliable option to simulate { reward = EvaluateGameCurrentPlayer(currentPlayerLeafNode.currentPOGame); } else // oppent have avaliable option to simulate { reward = UCBValue(bestOpNode); } } return(reward); }
public UCTNode OnePlayOut(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes) { //UCTNode currentNode = root; int step = 0; Debug.Assert(currentNode.currentPOGame.CurrentPlayer.Options().Count() > 0); bool isStop = false; while (!isStop && !currentNode.IsGameOver() && AvailableOptions(currentNode, availableOptionTypes).Count() == currentNode.childDict.Count()) { UCTNode bestChildNode = GetBestChildUCTNode(currentNode); //Debug.Assert(bestChildNode != null); if (bestChildNode == null) { isStop = true; } else { currentNode = bestChildNode; //step++; } } //LinkedList<int> optionList = null; UCTNode leafNode = MonteCarloSearch(currentNode, availableOptionTypes, step, mcSearchDepth); if (step > maxRealMCSearchTimes) { maxRealMCSearchTimes = step; } return(leafNode); }
public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond) { if (root.IsCurrentPlayerTurnEnd()) { return(root.currentPOGame.CurrentPlayer.Options()[0]); } if (root.IsGameOver()) { return(root.currentPOGame.CurrentPlayer.Options()[0]); } double reward = 0; int onePlayoutTimes = 0; bool isStop = false; long startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds(); if (_debug_) { Log.Instance().Append(currentNode.FullPrint()); } while (!isStop && onePlayoutTimes < maxOnePlayOutTimes && DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime && DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime) { UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes); if (_debug_) { Log.Instance().Append(currentPlayerLeafNode.FullPathPrint()); } if (_debug_) { Log.Instance().Append(currentPlayerLeafNode.FullPrint()); } reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime); if (_debug_) { Log.Instance().Append("reward : " + reward.ToString()); } UpdateReward(currentPlayerLeafNode, reward); onePlayoutTimes++; } reward = EvaluateUCTNode(root, maxTurnCalculateTime); PlayerTask bestOption = GetBestOption(root, reward); if (onePlayoutTimes > maxRealOnePlayOutTimes) { maxRealOnePlayOutTimes = onePlayoutTimes; } Debug.Assert(bestOption != null); return(bestOption); }
public UCTNode AddChild(UCTNode parent, PlayerTask option, int j, POGame.POGame simulatedPOGame) { Debug.Assert(simulatedPOGame != null); Debug.Assert(option != null); UCTNode newChildNode = new UCTNode(0, 0, j, 0, parent, option, simulatedPOGame, new Dictionary <int, UCTNode>()); parent.childDict.Add(j, newChildNode); return(newChildNode); }
public UCTNode(SabberStoneCoreAi.POGame.POGame poGame) { this.reward = 0; this.nj = 0; this.j = 0; this.N = 0; this.parent = null; this.currentPOGame = poGame; this.option = null; this.childDict = new Dictionary <int, UCTNode>(); }
public void UpdateReward(UCTNode leafNode, double reward) { UCTNode currentNode = leafNode; while (currentNode.parent != null) { currentNode.UpdateReward(reward); //Debug.Assert(currentNode.parent.N == currentNode.parent.SumChildN()); currentNode = currentNode.parent; } return; }
public string FullPathPrint() { var str = new StringBuilder(); UCTNode currentNode = this; do { str.AppendLine(PlayerTaskPrint(currentNode.option)); currentNode = currentNode.parent; } while (currentNode != null); return(str.ToString()); }
public PlayerTask GetEndTurnOption(UCTNode currentNode, bool playCards = false) { List <PlayerTask> optionsList = currentNode.currentPOGame.CurrentPlayer.Options(playCards); foreach (PlayerTask option in optionsList) { if (option.PlayerTaskType == PlayerTaskType.END_TURN) { return(option); } } return(null); }
public UCTNode MonteCarloSearch(UCTNode startNode, List <PlayerTaskType> availableOptionTypes, int step, int depth) { //GetEndTurnOption(currentNode.currentPOGame) == null UCTNode currentNode = startNode; List <PlayerTask> optionsList = AvailableOptions(currentNode, availableOptionTypes); bool isStop = false; while (!isStop && step < depth && optionsList.Count() > 0 && !currentNode.IsGameOver() || GetEndTurnOption(currentNode) == null ) { int rndOptionNo = rnd.Next(optionsList.Count()); PlayerTask rndOption = optionsList[rndOptionNo]; UCTNode tmpNode = currentNode.GetChild(rndOptionNo); // if not existed if (tmpNode == null) { // simulate the option POGame.POGame simulatedPOGame = SimulateOption(currentNode.currentPOGame, rndOption); // some time simulate option returned POGame is null if (simulatedPOGame == null) { isStop = true; Console.WriteLine(currentNode.FullPathPrint()); Console.WriteLine(currentNode.FullPrint()); } currentNode = currentNode.AddChild(currentNode, rndOption, rndOptionNo, simulatedPOGame); } //if j child is existed else { currentNode = tmpNode; } step++; // random select a option optionsList = AvailableOptions(currentNode, availableOptionTypes); } return(currentNode); }
public UCTNode(double reward, int nj, int j, int N, UCTNode parent, PlayerTask option, SabberStoneCoreAi.POGame.POGame poGame, Dictionary <int, UCTNode> childDict) { this.reward = reward; this.nj = nj; this.j = j; this.N = N; this.parent = parent; this.currentPOGame = poGame; this.option = option; this.childDict = childDict; }
public UCTNode GetWorstChildUCTNode(UCTNode node) { PlayerTask worstOption = null; UCTNode worstNode = null; double worstUCTValue = Double.MaxValue; foreach (KeyValuePair <int, UCTNode> item in node.childDict) { double uctValue = UCBValue(item.Value); if (uctValue < worstUCTValue) { worstUCTValue = uctValue; worstNode = item.Value; worstOption = item.Value.option; } } return(worstNode); }
public UCTNode GetBestChildUCTNode(UCTNode node) { PlayerTask bestOption = null; UCTNode bestSimulatedNode = null; double bestUCTValue = Double.MinValue; foreach (KeyValuePair <int, UCTNode> item in node.childDict) { double uctValue = UCBValue(item.Value); if (uctValue > bestUCTValue) { bestUCTValue = uctValue; bestSimulatedNode = item.Value; bestOption = item.Value.option; } } return(bestSimulatedNode); }
public UCTNode SimulateOppenont(UCTNode currentNode, long accumulativeMillisecond) { UCTNode opRootNode = currentNode; bool isStop = false; int opOnePlayoutTimes = 0; if (_debug_) { Log.Instance().Append("simulateOppenont start"); } while (!isStop && opOnePlayoutTimes < maxOpOnePlayOutTimes && DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < maxTurnCalculateTime) { UCTNode opLeafNode = OnePlayOut(currentNode, availableOpOptionTypes); if (_debug_) { Log.Instance().Append(opLeafNode.FullPathPrint()); } if (_debug_) { Log.Instance().Append(opLeafNode.FullPrint()); } double reward = EvaluateGameCurrentOpponent(opLeafNode.currentPOGame); if (_debug_) { Log.Instance().Append("reward" + reward.ToString()); } UpdateReward(opLeafNode, reward); opOnePlayoutTimes++; } if (_debug_) { Log.Instance().Append("simulateOppenont end"); } return(GetWorstChildUCTNode(opRootNode)); }
public PlayerTask GetBestOption(UCTNode node, double rootUCTValue) { PlayerTask bestOption = GetEndTurnOption(node); UCTNode bestSimulatedNode = null; double bestUCTValue = Double.MinValue; if (bestOption != null) { bestSimulatedNode = root; bestUCTValue = rootUCTValue; } foreach (KeyValuePair <int, UCTNode> item in node.childDict) { double uctValue = UCBValue(item.Value); if (uctValue > bestUCTValue) { bestUCTValue = uctValue; bestSimulatedNode = item.Value; bestOption = item.Value.option; } } Debug.Assert(bestUCTValue != Double.MinValue); return(bestOption); }
public double UCBValue(UCTNode node) { Debug.Assert(node.parent != null); Debug.Assert(node != null); return(UCBValue(node.reward, node.nj, node.parent.N, ucb1Coef)); }