예제 #1
0
        /*
         * Uses the UCB for Trees algorithm to select children for exploration.
         * If any children are unexplored, a child is selected from them uniformly
         * at random. Otherwise, the child with the maximal UCB is chosen.
         */
        private ChanceNode SelectChildToVisit(DecisionNode node)
        {
            double     maxUCB            = double.MinValue;
            ChanceNode bestChild         = null;
            var        unvisitedChildren = new List <ChanceNode>();

            foreach (var child in node.Children.Values)
            {
                if (child.VisitCount == 0)
                {
                    unvisitedChildren.Add(child);
                }
                else
                {
                    double ucb = CalculateUCB(node, child);
                    if (ucb > maxUCB)
                    {
                        maxUCB    = ucb;
                        bestChild = child;
                    }
                }
            }

            if (unvisitedChildren.Count == 0)
            {
                return(bestChild);
            }
            else
            {
                return(RandomProvider.Select(unvisitedChildren));
            }
        }
예제 #2
0
        private double CalculateUCB(DecisionNode parent, ChanceNode child)
        {
            double v = child.AverageValue;
            double n = Math.Log(parent.VisitCount) / child.VisitCount;

            return(v + EXPLORATION_RATE * Math.Sqrt(n));
        }
예제 #3
0
        private double GetChanceNodeValue(ChanceNode node)
        {
            var child = node.GenerateChild();

            return(SampleSearchTree(child));
        }