/* * Uses the UCB for Trees algorithm to select children for exploration. * If any children are unexplored, a child is selected from them uniformly * at random. Otherwise, the child with the maximal UCB is chosen. */ private ChanceNode SelectChildToVisit(DecisionNode node) { double maxUCB = double.MinValue; ChanceNode bestChild = null; var unvisitedChildren = new List <ChanceNode>(); foreach (var child in node.Children.Values) { if (child.VisitCount == 0) { unvisitedChildren.Add(child); } else { double ucb = CalculateUCB(node, child); if (ucb > maxUCB) { maxUCB = ucb; bestChild = child; } } } if (unvisitedChildren.Count == 0) { return(bestChild); } else { return(RandomProvider.Select(unvisitedChildren)); } }
private double CalculateUCB(DecisionNode parent, ChanceNode child) { double v = child.AverageValue; double n = Math.Log(parent.VisitCount) / child.VisitCount; return(v + EXPLORATION_RATE * Math.Sqrt(n)); }
private double GetChanceNodeValue(ChanceNode node) { var child = node.GenerateChild(); return(SampleSearchTree(child)); }