public List <PlayerTask> AvailableOptions(UCTNode currentNode, List <PlayerTaskType> availableOptionTypes, bool playCards = true) { if (currentNode == null) { Log.Instance().Append(currentNode.FullPathPrint()); Log.Instance().Append(currentNode.FullPrint()); } if (currentNode.currentPOGame == null) { Log.Instance().Append(currentNode.FullPathPrint()); Log.Instance().Append(currentNode.FullPrint()); } List <PlayerTask> options = currentNode.currentPOGame.CurrentPlayer.Options(playCards); List <PlayerTask> availableOptions = new List <PlayerTask>(); foreach (PlayerTask option in options) { if (availableOptionTypes.Contains(option.PlayerTaskType)) { availableOptions.Add(option); } } return(availableOptions); }
public PlayerTask Simulate(UCTNode currentNode, long accumulativeMillisecond) { if (root.IsCurrentPlayerTurnEnd()) { return(root.currentPOGame.CurrentPlayer.Options()[0]); } if (root.IsGameOver()) { return(root.currentPOGame.CurrentPlayer.Options()[0]); } double reward = 0; int onePlayoutTimes = 0; bool isStop = false; long startMilliseconds = DateTimeOffset.Now.ToUnixTimeMilliseconds(); if (_debug_) { Log.Instance().Append(currentNode.FullPrint()); } while (!isStop && onePlayoutTimes < maxOnePlayOutTimes && DateTimeOffset.Now.ToUnixTimeMilliseconds() - startMilliseconds < this.maxOptionCalculateTime && DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < this.maxTurnCalculateTime) { UCTNode currentPlayerLeafNode = OnePlayOut(currentNode, availableOptionTypes); if (_debug_) { Log.Instance().Append(currentPlayerLeafNode.FullPathPrint()); } if (_debug_) { Log.Instance().Append(currentPlayerLeafNode.FullPrint()); } reward = EvaluateUCTNode(currentPlayerLeafNode, maxTurnCalculateTime); if (_debug_) { Log.Instance().Append("reward : " + reward.ToString()); } UpdateReward(currentPlayerLeafNode, reward); onePlayoutTimes++; } reward = EvaluateUCTNode(root, maxTurnCalculateTime); PlayerTask bestOption = GetBestOption(root, reward); if (onePlayoutTimes > maxRealOnePlayOutTimes) { maxRealOnePlayOutTimes = onePlayoutTimes; } Debug.Assert(bestOption != null); return(bestOption); }
public UCTNode MonteCarloSearch(UCTNode startNode, List <PlayerTaskType> availableOptionTypes, int step, int depth) { //GetEndTurnOption(currentNode.currentPOGame) == null UCTNode currentNode = startNode; List <PlayerTask> optionsList = AvailableOptions(currentNode, availableOptionTypes); bool isStop = false; while (!isStop && step < depth && optionsList.Count() > 0 && !currentNode.IsGameOver() || GetEndTurnOption(currentNode) == null ) { int rndOptionNo = rnd.Next(optionsList.Count()); PlayerTask rndOption = optionsList[rndOptionNo]; UCTNode tmpNode = currentNode.GetChild(rndOptionNo); // if not existed if (tmpNode == null) { // simulate the option POGame.POGame simulatedPOGame = SimulateOption(currentNode.currentPOGame, rndOption); // some time simulate option returned POGame is null if (simulatedPOGame == null) { isStop = true; Console.WriteLine(currentNode.FullPathPrint()); Console.WriteLine(currentNode.FullPrint()); } currentNode = currentNode.AddChild(currentNode, rndOption, rndOptionNo, simulatedPOGame); } //if j child is existed else { currentNode = tmpNode; } step++; // random select a option optionsList = AvailableOptions(currentNode, availableOptionTypes); } return(currentNode); }
public UCTNode SimulateOppenont(UCTNode currentNode, long accumulativeMillisecond) { UCTNode opRootNode = currentNode; bool isStop = false; int opOnePlayoutTimes = 0; if (_debug_) { Log.Instance().Append("simulateOppenont start"); } while (!isStop && opOnePlayoutTimes < maxOpOnePlayOutTimes && DateTimeOffset.Now.ToUnixTimeMilliseconds() - accumulativeMillisecond < maxTurnCalculateTime) { UCTNode opLeafNode = OnePlayOut(currentNode, availableOpOptionTypes); if (_debug_) { Log.Instance().Append(opLeafNode.FullPathPrint()); } if (_debug_) { Log.Instance().Append(opLeafNode.FullPrint()); } double reward = EvaluateGameCurrentOpponent(opLeafNode.currentPOGame); if (_debug_) { Log.Instance().Append("reward" + reward.ToString()); } UpdateReward(opLeafNode, reward); opOnePlayoutTimes++; } if (_debug_) { Log.Instance().Append("simulateOppenont end"); } return(GetWorstChildUCTNode(opRootNode)); }