private MCTSAZNode Select(MCTSAZNode root) { while (root.Children.Count != 0) //Look for a leaf node (one we haven't expanded yet) { MCTSAZNode bestNext = null; double bestValue = double.MinValue; for (var i = 0; i < root.Children.Count; i++) { var c = root.Children[i]; double uctValue = c.AverageValue + c.CalculateUCT(_random, root, root.NetworkResult.MoveRating[c.NetworkChildIndex]); // small random number to break ties randomly in unexpanded nodes if (root.Parent == null) { uctValue *= _random.NextDouble(); } if (uctValue > bestValue) { bestNext = c; bestValue = uctValue; } } root = bestNext; } return(root); }
public double CalculateUCT(Random random, MCTSAZNode parent, float priorProbability) { //https://web.stanford.edu/~surag/posts/alphazero.html //Page 8 https://www.nature.com/articles/nature24270.epdf?author_access_token=VJXbVjaSHxFoctQQ4p2k4tRgN0jAjWel9jnR3ZoTv0PVW4gB86EEpGqTRDtpIz-2rmo8-KG06gqVobU5NSCFeHILHcVFUeMsbvwS-lxjqQGg98faovwjxeTUgZAUMnRQ return(ExplorationParameter * (priorProbability + Epsilon) * Math.Sqrt(parent.VisitCount + 1) / (1 + VisitCount)); }
public void Reset() { Children.Clear(); Value = 0; VisitCount = 0; AverageValue = 0; Parent = null; NetworkResult = null; PieceToPurchase = null; NetworkChildIndex = -1; }
/// <summary> /// Finds the best child based on their VisitCount /// </summary> /// <returns></returns> internal MCTSAZNode FindBestChild(MCTSAZNode root) { //Perform the best move var best = root.Children[0]; int bestVisitCount = root.Children[0].VisitCount; for (var index = 1; index < root.Children.Count; index++) { var child = root.Children[index]; if (child.VisitCount > bestVisitCount) //TODO: Handle draws { best = child; bestVisitCount = child.VisitCount; } } return(best); }
private void Expand(MCTSAZNode root) { root.Expand(_client); }