//function used to perform backpropagation of the MCTS cycle private void Backpropogation(MonteCarloNode _nodeToExplore, int _winningPlayerNo) { //creates a backup node, setting the values to that of node to expore as passed through MonteCarloNode _backupNode = _nodeToExplore; //loops while the backup node is not null while (_backupNode != null) { //increments the visit count of the backup node's state _backupNode.GetState().incrementVisit(); //checks if the winner of the simulation was the enemy if (_winningPlayerNo == MonteCarloBoard._enemyVal) { //increases the backup node's state's score by 10 _backupNode.GetState().addScore(10); } //checks if the winner of the simulation was the player else if (_winningPlayerNo == MonteCarloBoard._playerVal) { //decreases the backup node's state's score by 10 _backupNode.GetState().addScore(-10); } //sets the backup node to it's parent node _backupNode = _backupNode.GetParent(); } }
//Finds max visit value within child nodes of referenced Node public static MonteCarloNode findBestUCTNode(MonteCarloNode _node) { //creates a new int parent visit, setting the value to that of the referenced node's state's visit count int _parentVisit = _node.GetState().GetVisits(); //creates variables used to assist in finding the best UCT node int _childIndex = 0; double _uctValue; double _prevUctValue = int.MinValue; //loops which iterates for the number of children in the referenced node, finding the best child node with UCT for (int i = 0; i < _node.GetChildren().Count; i++) { //finds the UCT value of the current child node _uctValue = UCTValue(_parentVisit, _node.GetChildren()[i].GetState().GetScore(), _node.GetChildren()[i].GetState().GetVisits()); //checks if the current UCT value is greater than that of the previous UCT value if (_uctValue > _prevUctValue) { //sets the child index to the index of the current child node _childIndex = i; } //sets the previous UCT value to the current UCT value _prevUctValue = _uctValue; } //returns the child node at the index which produced the best UCT value return(_node.GetChildren()[_childIndex]); }
//function used to perform the MCTS Cycle public void MCTSCycle(MonteCarloNode _rootNode) { // Phase 1 - Selection //finds best node from root node using UCT (Upper Confidence Bound for Trees) MonteCarloNode _selectionNode = Selection(_rootNode); // Phase 2 - Expansion //Checks if the selected node produces a win state, otherwise expanding on the selected node if (_selectionNode.GetState().GetBoard().CheckStatus() == MonteCarloBoard._inProgress) { //Performs expansion on the seleted node Expansion(_selectionNode); } // Phase 3 - Simulation //sets the node to explore to the selected node MonteCarloNode _nodeToExplore = _selectionNode; //checks if the node to explore contains any children if (_selectionNode.GetChildren().Count > 0) { //sets the node to explore to a random child node if any are available _nodeToExplore = _selectionNode.GetRandomChild(); } //simulates the node to explore and produces the number of the winner of the simulation, storing the result int _simulationWinner = Simulation(_nodeToExplore); // Phase 4 - Update //backpropagates up the tree, updating node scores from the node to explore to root based upon the winner of the simulation Backpropogation(_nodeToExplore, _simulationWinner); }
//monte carlo simulating, go to final state, adding rewards regards to different ending public int Simulate(MonteCarloNode node) { Board.isSimulating = true; Board.Instance.CheckAllowedMoves(); while (Board.Instance.availableMoves.Count != 0) { //randomly choose a node to play List <Vector2> potentialMoves = Board.Instance.availableMoves; int i = UnityEngine.Random.Range(0, potentialMoves.Count - 1); Board.Instance.StartPlay(new int[2] { (int)potentialMoves[i].x, (int)potentialMoves[i].y }); Board.Instance.CheckAllowedMoves(); } //reset board every time finishing simulation Board.Instance.ResetBoard(); if (Board.Instance.WhiteCount == Board.Instance.BlackCount) { return(0); } return(Board.Instance.WhiteCount > Board.Instance.BlackCount ? 1 : -1); }
//copy a node public MonteCarloNode(MonteCarloNode node) { visitTimes = node.visitTimes; score = node.score; parent = node.parent; children = node.children; pos = node.pos; }
//init new node has parent public MonteCarloNode(MonteCarloNode Parent, Vector2 vec) { visitTimes = 0; score = 0; parent = Parent; children = new List <MonteCarloNode>(); this.pos = vec; Board.Instance.CheckAllowedMoves(); }
//if the current node is not leaf node, expand the tree public MonteCarloNode TreePolicy(MonteCarloNode root) { MonteCarloNode v = root; while (moves.Count != 0) { return(v.Expand(ref moves)); } return(BestChild(v)); }
public MonteCarloNode(MonteCarloNode n) { score = n.score; timesVisited = n.timesVisited; ai = n.ai; parent = n.parent; children = new List <MonteCarloNode>(n.children); availableMoves = new List <MonteCarloNode>(n.availableMoves); board = new BoardState(n.board); point = n.point; }
public MonteCarloNode(BoardState b, AIManager AI) { score = 0; timesVisited = 0; ai = AI; parent = null; children = new List <MonteCarloNode> (); availableMoves = new List <MonteCarloNode> (); board = new BoardState(b); AddAvailableMoves(board.availableMoves.Keys.ToList()); }
public MonteCarloNode Expand() { if (availableMoves.Count > 0) { MonteCarloNode ret = availableMoves[0]; AddChild(ret); availableMoves.Remove(ret); return(ret); } Debug.Log("really really big problems"); return(null); }
//expand the current node, randomly adding available move to child node. public MonteCarloNode Expand(ref List <Vector2> nextMoves) { if (nextMoves.Count > 0) { //int r = UnityEngine.Random.Range(0, nextMoves.Count - 1); int r = 0; MonteCarloNode temp = new MonteCarloNode(this, nextMoves[r]); nextMoves.Remove(nextMoves[r]); children.Add(temp); return(temp); } return(null); }
//generate AI move public Vector2 AImove() { Vector2 move = new Vector2(); MonteCarloNode rootNode = new MonteCarloNode(); Board.Instance.CheckAllowedMoves(); foreach (var m in Board.Instance.availableMoves) { moves.Add(m); } //simulate xxx times for (int i = 0; i < expansion; i++) { MonteCarloNode next = TreePolicy(rootNode); int a = Simulate(next); next.BackUp(a); } MonteCarloNode max = null; double maxValue = double.NegativeInfinity; //calculate each nodes weight, return the biggest foreach (MonteCarloNode node in rootNode.children) { if (node.visitTimes == 0) { continue; } if ((double)node.score / (double)node.visitTimes > maxValue) { max = new MonteCarloNode(node); maxValue = (double)node.score / (double)node.visitTimes; } /* * double utc = ((double)node.score / (double)node.visitTimes) + getRHS(rootNode.visitTimes, node.visitTimes); * * if (utc > maxValue) * { * max = node; * maxValue = utc; * }*/ } move = max.pos; return(move); }
public MonteCarloNode(MonteCarloNode Parent, Point point) { score = 0; timesVisited = 0; ai = Parent.ai; parent = Parent; children = new List <MonteCarloNode> (); availableMoves = new List <MonteCarloNode> (parent.availableMoves); this.point = point; board = new BoardState(parent.board); board.ApplyMove(board.PlacePiece(point)); board.turn = board.turn == BoardState.GameTurn.Computer ? BoardState.GameTurn.Player : BoardState.GameTurn.Computer; board.GenerateAvailableMoves(); //AddAvailableMoves(board.availableMoves.Keys.ToList()); }
private MonteCarloNode TreePolicy(MonteCarloNode n) { MonteCarloNode v = n; while (v.board.availableMoves.Count != 0) { v.AddAvailableMoves(v.board.availableMoves.Keys.ToList()); if (v.availableMoves.Count != 0) { return(v.Expand()); } v = v.BestChild(); } return(v); }
//function used to perform the selection of the MCTS cycle private MonteCarloNode Selection(MonteCarloNode _rootNode) { //sets a new node variable to the root node passed through MonteCarloNode _UCTnode = _rootNode; //checks if the node contains any child nodes while (_UCTnode.GetChildren().Count != 0) { //finds the best child node using UCT (Upper Confidence Bound for Trees) of the current node, then updates the node //this is looped while the node contains children, finding the best child node with no more child nodes _UCTnode = MonteCarloUCT.findBestUCTNode(_UCTnode); } //the UCT node is returned return(_UCTnode); }
//init new node public MonteCarloNode() { visitTimes = 0; score = 0; parent = null; children = new List <MonteCarloNode>(); pos = new Vector2(); /* * Board.Instance.CheckAllowedMoves(); * for (int i = 0; i < Board.Instance.availableMoves.Count; i++) * { * MonteCarloNode temp = new MonteCarloNode(this, Board.Instance.availableMoves[i]); * children.Add(temp); * } */ }
public MonteCarloNode BestChild() { double bestVal = double.MinValue; MonteCarloNode bestChild = null; foreach (MonteCarloNode node in children) { double utc = ((double)node.score / (double)node.timesVisited) + ai.UCB1RHS(timesVisited, node.timesVisited); if (utc > bestVal) { bestChild = node; bestVal = utc; } } return(bestChild); }
//function used to perform MCTS and return the best node's board public MonteCarloBoard findNextMove(MonteCarloBoard _board) { //gathers a start and end time, limiting the amount of MCTS cycles for the enemy to a timer float _start = Time.realtimeSinceStartup * 100; float _end = _start + 5; //gets the root of the tree MonteCarloNode _rootNode = _tree.getRoot(); //sets the root node to the current board _rootNode.GetState().SetBoard(_board); //checks whether learning phase has been carried out if (!_tree.learningPhase) { //performs 1000 iterations of MCTS for (int l = 0; l < 1000; l++) { //function used to perform MCTS MCTSCycle(_rootNode); } //sets learning phase as true so that it does not occur again _tree.learningPhase = true; } else { //performs the MCTS for the limited amount of time, terminating on the last cycle once the time is reached. while (Time.realtimeSinceStartup * 100 < _end) { //function used to perform MCTS MCTSCycle(_rootNode); } } //finds the best child node of the root node MonteCarloNode _optimalNode = _rootNode.GetMaxChild(); //sets the root node to the new winner node _tree.setRoot(_optimalNode); //returns the board of the root's best child node return(_optimalNode.GetState().GetBoard()); }
//function used to perform expansion of the MCTS cycle private void Expansion(MonteCarloNode _nodeToExpand) { //finds all legal states for the current node List <MonteCarloState> _legalStates = _nodeToExpand.GetState().GetLegalStates(); //loops through each legal state found foreach (MonteCarloState _state in _legalStates) { //creates a new node, setting it's state to a legal state found MonteCarloNode _newNode = new MonteCarloNode(_state); //sets the parent as the node to expand as passed through _newNode.SetParent(_nodeToExpand); //adds the new node to the child array of the node to expand _nodeToExpand.GetChildren().Add(_newNode); } }
//Upper Confidence bound applied to Trees public MonteCarloNode BestChild(MonteCarloNode parent) { double bestVal = double.MinValue; MonteCarloNode bestChild = null; //Debug.Log("Best"); foreach (MonteCarloNode node in parent.children) { double uct = ((double)node.score / (double)node.visitTimes) + getRHS(parent.visitTimes, node.visitTimes); if (uct > bestVal) { bestChild = node; bestVal = uct; } } return(bestChild); }
public MonteCarloNode(MonteCarloNode _newNode) { this._children = new List <MonteCarloNode>(); this._state = new MonteCarloState(_newNode.GetState()); if (_newNode.GetParent() != null) { this._parent = _newNode.GetParent(); } List <MonteCarloNode> _nodeChildren = _newNode.GetChildren(); foreach (MonteCarloNode _child in _nodeChildren) { this._children.Add(new MonteCarloNode(_child)); } }
public List <Point> ComputerMoveMTCS() { //Debug.Log ("computer move"); List <Point> computerMove = new List <Point> (); Point bestMove = new Point(); MonteCarloNode rootNode = new MonteCarloNode(manager.board.state, this); for (int i = 0; i < numExpansions; i++) { MonteCarloNode n = TreePolicy(rootNode); n.Backup(Simulate(n)); } //Debug.Log ("finished simulating"); MonteCarloNode maxNode = null; //Debug.Log ("maxnode set"); double maxVal = double.NegativeInfinity; foreach (MonteCarloNode node in rootNode.children) { if (node.timesVisited == 0) { continue; } if ((double)node.score / (double)node.timesVisited > maxVal) { maxNode = new MonteCarloNode(node); maxVal = (double)node.score / (double)node.timesVisited; } } bestMove = maxNode.point; board.state.availableMoves.TryGetValue(bestMove, out computerMove); // Have to add the move itself to the list of Points computerMove.Insert(0, bestMove); return(computerMove); }
//function used to perform simulation of the MCTS cycle private int Simulation(MonteCarloNode _nodeToSimulate) { //sets the turn number to the enemy value, so that the simulation begins within the right turn int _turn = MonteCarloBoard._enemyVal; //creates a temporary node, setting the values to that of the node to simulate MonteCarloNode _tempNode = new MonteCarloNode(_nodeToSimulate); //creates a tempporary state, setting the values to that of the temporary node's state MonteCarloState _tempState = _tempNode.GetState(); //checks for the current board status of the temporary node int _boardStatus = _tempState.GetBoard().CheckStatus(); //checks if the current board state produces a win for the player if (_boardStatus == MonteCarloBoard._playerVal) { //sets the score of the temp node's parent's state to the minimum integer value _tempNode.GetParent().GetState().SetScore(int.MinValue); //returns the board status value return(_boardStatus); } //loops while the board state is in progress while (_boardStatus == MonteCarloBoard._inProgress) { //changes the turn between the player and the enemy _turn = 3 - _turn; //performs a random play from the current tempstate _tempState.randomPlay(_turn); //sets the board status to the current board state of the temp state board _boardStatus = _tempState.GetBoard().CheckStatus(); } //returns the board status value return(_boardStatus); }
public int Simulate(MonteCarloNode node) { //Debug.Log ("simulate" ); BoardState board = new BoardState(node.board); UnityEngine.Random.seed = (int)Time.timeSinceLevelLoad; board.GenerateAvailableMoves(); while (board.availableMoves.Count != 0) { List <Point> moves = board.availableMoves.Keys.ToList(); int i = UnityEngine.Random.Range(0, moves.Count); //List<Point> p = new List<Point>();W /*if(!board.AvailableMove(moves[i], ref p)) * { * * } * * Debug.Log (i + " " + moves.Count() );*/ board.ApplyMove(board.PlacePiece(moves[i])); board.GenerateAvailableMoves(); } //Debug.Log ("simulated"); if (board.WhiteCount > board.BlackCount) { return(1); } else if (board.WhiteCount > board.BlackCount) { return(-1); } else { return(0); } }
//class constructors public MonteCarloTree() { _root = new MonteCarloNode(); learningPhase = false; }
public void SetParent(MonteCarloNode _newParent) { this._parent = _newParent; }
public void setRoot(MonteCarloNode _node) { this._root = _node; }
public void addChild(MonteCarloNode _parent, MonteCarloNode _child) { _parent.GetChildren().Add(_child); }
public void AddChild(MonteCarloNode Child) { children.Add(Child); }
public MonteCarloTree(MonteCarloNode _node) { this._root = _node; }