public UCTNode OnePlayOut(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes) { //UCTNode currentNode = root; int step = 0; Debug.Assert(currentNode.currentPOGame.CurrentPlayer.Options().Count() > 0); bool isStop = false; while (!isStop && !currentNode.IsGameOver() && AvailableOptions(currentNode, availableOptionTypes).Count() == currentNode.childDict.Count()) { UCTNode bestChildNode = GetBestChildUCTNode(currentNode); //Debug.Assert(bestChildNode != null); if (bestChildNode == null) { isStop = true; } else { currentNode = bestChildNode; //step++; } } //LinkedList<int> optionList = null; UCTNode leafNode = MonteCarloSearch(currentNode, availableOptionTypes, step, mcSearchDepth); if (step > maxRealMCSearchTimes) { maxRealMCSearchTimes = step; } return(leafNode); }
public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond) { if (root.IsCurrentPlayerTurnEnd()) { return(root.currentPOGame.CurrentPlayer.Options()[0]); } if (root.IsGameOver()) { return(root.currentPOGame.CurrentPlayer.Options()[0]); } double reward = 0; int onePlayoutTimes = 0; bool isStop = false; long startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds(); if (_debug_) { Log.Instance().Append(currentNode.FullPrint()); } while (!isStop && onePlayoutTimes < maxOnePlayOutTimes && DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime && DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime) { UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes); if (_debug_) { Log.Instance().Append(currentPlayerLeafNode.FullPathPrint()); } if (_debug_) { Log.Instance().Append(currentPlayerLeafNode.FullPrint()); } reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime); if (_debug_) { Log.Instance().Append("reward : " + reward.ToString()); } UpdateReward(currentPlayerLeafNode, reward); onePlayoutTimes++; } reward = EvaluateUCTNode(root, maxTurnCalculateTime); PlayerTask bestOption = GetBestOption(root, reward); if (onePlayoutTimes > maxRealOnePlayOutTimes) { maxRealOnePlayOutTimes = onePlayoutTimes; } Debug.Assert(bestOption != null); return(bestOption); }
public UCTNode MonteCarloSearch(UCTNode startNode, List <PlayerTaskType> availableOptionTypes, int step, int depth) { //GetEndTurnOption(currentNode.currentPOGame) == null UCTNode currentNode = startNode; List <PlayerTask> optionsList = AvailableOptions(currentNode, availableOptionTypes); bool isStop = false; while (!isStop && step < depth && optionsList.Count() > 0 && !currentNode.IsGameOver() || GetEndTurnOption(currentNode) == null ) { int rndOptionNo = rnd.Next(optionsList.Count()); PlayerTask rndOption = optionsList[rndOptionNo]; UCTNode tmpNode = currentNode.GetChild(rndOptionNo); // if not existed if (tmpNode == null) { // simulate the option POGame.POGame simulatedPOGame = SimulateOption(currentNode.currentPOGame, rndOption); // some time simulate option returned POGame is null if (simulatedPOGame == null) { isStop = true; Console.WriteLine(currentNode.FullPathPrint()); Console.WriteLine(currentNode.FullPrint()); } currentNode = currentNode.AddChild(currentNode, rndOption, rndOptionNo, simulatedPOGame); } //if j child is existed else { currentNode = tmpNode; } step++; // random select a option optionsList = AvailableOptions(currentNode, availableOptionTypes); } return(currentNode); }