Example #1
0
        private double CalculateUCB(DecisionNode parent, ChanceNode child)
        {
            double v = child.AverageValue;
            double n = Math.Log(parent.VisitCount) / child.VisitCount;

            return(v + EXPLORATION_RATE * Math.Sqrt(n));
        }
Example #2
0
        /*
         * Uses the UCB for Trees algorithm to select children for exploration.
         * If any children are unexplored, a child is selected from them uniformly
         * at random. Otherwise, the child with the maximal UCB is chosen.
         */
        private ChanceNode SelectChildToVisit(DecisionNode node)
        {
            double     maxUCB            = double.MinValue;
            ChanceNode bestChild         = null;
            var        unvisitedChildren = new List <ChanceNode>();

            foreach (var child in node.Children.Values)
            {
                if (child.VisitCount == 0)
                {
                    unvisitedChildren.Add(child);
                }
                else
                {
                    double ucb = CalculateUCB(node, child);
                    if (ucb > maxUCB)
                    {
                        maxUCB    = ucb;
                        bestChild = child;
                    }
                }
            }

            if (unvisitedChildren.Count == 0)
            {
                return(bestChild);
            }
            else
            {
                return(RandomProvider.Select(unvisitedChildren));
            }
        }
Example #3
0
        public DecisionNode GenerateChild()
        {
            var nextState = new GameState(State);
            var addedTile = nextState.AddRandomTile();

            if (!Children.TryGetValue(addedTile.Cell, out DecisionNode child))
            {
                child = new DecisionNode(nextState);
                Children.Add(addedTile.Cell, child);
            }

            return(child);
        }
Example #4
0
        /*
         * Plays randomly until a terminal state is reached.
         */
        private double DoRollout(DecisionNode node)
        {
            var currentState = new GameState(node.State);
            var legalActions = new List <Action>(currentState.GetLegalActions());

            while (legalActions.Count > 0)
            {
                Action randomAction = RandomProvider.Select(legalActions);
                currentState.ApplyAction(randomAction);
                currentState.AddRandomTile();
                legalActions = new List <Action>(currentState.GetLegalActions());
            }

            return(GetValueFor(currentState));
        }
Example #5
0
        /// <summary>
        /// Returns the set of legal actions in the given state with their corresponding
        /// expected values.
        /// </summary>
        /// <param name="state">the game state</param>
        /// <param name="searchLimit">a search limit for the algorithm</param>
        public IEnumerable <ActionValue> GetPolicies(GameState state, ISearchLimit searchLimit)
        {
            TreeRoot = new DecisionNode(state);
            while (!searchLimit.Done())
            {
                SampleSearchTree(TreeRoot);
            }

            var results = new List <ActionValue>(TreeRoot.Children.Select(pair => new ActionValue()
            {
                Action = pair.Key,
                Value  = pair.Value.VisitCount > 0 ? pair.Value.AverageValue : 0
            }));

            RandomProvider.Shuffle(results);             // break ties randomly

            return(results);
        }
Example #6
0
 private double GetDecisionNodeValue(DecisionNode node)
 {
     if (node != TreeRoot && node.VisitCount == 0)
     {
         return(DoRollout(node));
     }
     else
     {
         node.ExpandChildren();
         if (node.Children.Count == 0)
         {
             return(GetValueFor(node.State));
         }
         else
         {
             var child = SelectChildToVisit(node);
             return(SampleSearchTree(child));
         }
     }
 }