Exemplo n.º 1
0
        /// <summary>
        /// If the argument node has been visited at least T times, the node is expanded by advancing the PositionGenerator (or creating one if it is undefined) and adding the new child to the node.
        /// </summary>
        /// <param name="context">The context of the search.</param>
        /// <param name="node">The node that is to be expanded.</param>
        /// <param name="state">The state to expand from.</param>
        /// <returns>The argument node if it has been visited less than T time, or if no more expansion is possible, otherwise the newly created child node.</returns>
        public TreeSearchNode <P, A> Expand(SearchContext <D, P, A, S, Sol> context, TreeSearchNode <P, A> node, P state)
        {
            // No expansion before T visits, except for the root.
            if (node.Visits < T && !node.IsRoot())
            {
                return(node);
            }

            // Create a position generator if there is not one set in the node yet.
            var positionGenerator = node.PositionGenerator;

            if (positionGenerator == null)
            {
                var expansion = context.Expansion;
                positionGenerator      = expansion.Expand(context, state);
                node.PositionGenerator = positionGenerator;
            }

            // Move the PositionGenerator to the next item, if available (note: PositionGenerator initialises to before the first item).
            if (!positionGenerator.MoveNext())
            {
                return(node);
            }
            var child = new TreeSearchNode <P, A>(positionGenerator.Current);

            node.AddChild(child);
            child.Parent = node;
            return(child);
        }
 /// <inheritdoc />
 public double Score(TreeSearchNode <P, A> node)
 {
     if (node.Visits <= 0)
     {
         return(0);
     }
     return(node.Score / node.Visits);
 }
Exemplo n.º 3
0
        /// <summary>
        /// Perform the search. Note: should set the Solution in the SearchContext and update its Status.
        /// </summary>
        /// <param name="context">The context within which the search happens.</param>
        public override void Search(SearchContext <D, P, A, S, Sol> context)
        {
            var clone     = context.Cloner;
            var rootState = context.Source;
            var apply     = context.Application;

            var endTime = DateTime.Now.AddMilliseconds(Time);
            var it      = 0;

            // Setup for when we might be continuing a search from a specific node.
            var root = (TreeSearchNode <P, A>)context.StartNode;

            if (root == null)
            {
                root = new TreeSearchNode <P, A>(clone.Clone(rootState));
                context.StartNode = root;
            }

            while ((Time == Constants.NO_LIMIT_ON_THINKING_TIME || DateTime.Now < endTime) &&
                   (Iterations == Constants.NO_LIMIT_ON_ITERATIONS || it < Iterations))
            {
                it++;

                var worldState = clone.Clone(rootState);
                var target     = root;

                // Check if we have to expand
                if (!target.IsFullyExpanded())
                {
                    target = ExpansionStrategy.Expand(context, root, worldState);
                }

                // Select a node
                if (target == null || target == root)
                {
                    target = SelectionStrategy.SelectNextNode(context, root);
                }

                // Apply action in selected node
                worldState = apply.Apply(context, worldState, target.Payload);
                // Simulate
                var endState = PlayoutStrategy.Playout(context, worldState);

                // Backpropagation
                BackPropagationStrategy.BackPropagate(context, EvaluationStrategy, target, endState);
            }

            var finalNode = FinalNodeSelectionStrategy.SelectFinalNode(context, root);

            context.Solution    = SolutionStrategy.Solution(context, finalNode);
            context.BudgetSpent = it;
            context.Status      = SearchContext <D, P, A, S, Sol> .SearchStatus.Success;
        }
Exemplo n.º 4
0
        /// <summary>
        /// Uses the local Multi-Armed-Bandits to explore the action space and uses the global Multi-Armed-Bandit to exploit the best performing actions.
        /// </summary>
        /// <param name="context">The current search context.</param>
        /// <param name="state">The game state for the node.</param>
        /// <param name="gMAB">The global Multi-Armed-Bandit.</param>
        /// <returns>An <see cref="A"/> that was selected from the global Multi-Armed-Bandit.</returns>
        private A NaïveSampling(SearchContext <D, P, A, S, Sol> context, P state, IDictionary <long, Dictionary <int, LocalArm> > gMAB)
        {
            var apply      = context.Application;
            var stateClone = context.Cloner.Clone(state);
            var stateHash  = stateClone.HashMethod();

            if (!gMAB.ContainsKey(stateHash))
            {
                gMAB.Add(stateHash, new Dictionary <int, LocalArm>());
            }

            // Use a policy p_0 to determine whether to explore or exploit
            // If explore was selected
            //      x_1...x_n is sampled by using a policy p_l to select a value for each X_i in X independently.
            //      As a side effect, the resulting value combination is added to the global MAB.
            // If exploit was selected
            //      x_1...x_n is sampled by using a policy p_g to select a value combination using MAB_g.

            // Can only exploit if there is anything to exploit in the first place
            if (gMAB[stateHash].IsNullOrEmpty() || ExplorationStrategy.Policy(context, 0))
            {
                // Explore

                // Create an action according to policy p_1
                var action     = SamplingStrategy.Sample(stateClone);
                var actionHash = action.GetHashCode();
                // Evaluate the sampled action
                var endState = PlayoutStrategy.Playout(context, apply.Apply(context, stateClone, action));
                var tempNode = new TreeSearchNode <P, A> {
                    Payload = action
                };
                var reward = EvaluationStrategy.Evaluate(context, tempNode, endState);
                // Add the action to the global MAB
                if (gMAB[stateHash].ContainsKey(actionHash))
                {
                    gMAB[stateHash][actionHash].Visit(reward);
                }
                else
                {
                    var newArm = new LocalArm(action);
                    newArm.Visit(reward);
                    gMAB[stateHash].Add(actionHash, newArm);
                }

                return(action);
            }

            // Exploit; epsilon-greedy by returning the action with the highest expected reward with probability 1-e, otherwise returning random.
            return(_rng.NextDouble() <= 1 - PolicyGlobal ? gMAB[stateHash].Values.OrderByDescending(i => i.ExpectedReward).First().Action : gMAB[stateHash].RandomElementOrDefault().Value.Action);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Perform the search. Note: should set the Solution in the SearchContext and update its Status.
        /// </summary>
        /// <param name="context">The context within which the search happens.</param>
        public override void Search(SearchContext <D, P, A, S, Sol> context)
        {
            var clone     = context.Cloner;
            var rootState = context.Source;

            var endTime = DateTime.Now.AddMilliseconds(Time);
            var it      = 0;

            // Setup for when we might be continuing a search from a specific node.
            var root = (TreeSearchNode <P, A>)context.StartNode;

            if (root == null)
            {
                root = new TreeSearchNode <P, A>(clone.Clone(rootState));
                context.StartNode = root;
            }

            // Set up a global MAB, to hold the value combinations created during the naïve-sampling process.
            var gMAB = new Dictionary <long, Dictionary <int, LocalArm> >();

            while ((Time == Constants.NO_LIMIT_ON_THINKING_TIME || DateTime.Now < endTime) && (Iterations == Constants.NO_LIMIT_ON_ITERATIONS || it < Iterations))
            {
                it++;

                // SelectAndExpand, reference the iteration counter because it might be updated in the recursive call
                var selectedNode = NaïveSelectAndExpand(context, root, gMAB, endTime, ref it);

                // Keep track of the maximum depth we reach
                var nodeDepth = selectedNode.CalculateDepth();
                if (nodeDepth > MaxDepth)
                {
                    MaxDepth = nodeDepth;
                }

                // Simulate
                var endState = PlayoutStrategy.Playout(context, (P)selectedNode.State.Copy());

                // Backpropagation
                BackPropagationStrategy.BackPropagate(context, EvaluationStrategy, selectedNode, endState);
            }

            var finalNode = FinalNodeSelectionStrategy.SelectFinalNode(context, root);

            context.Solution    = SolutionStrategy.Solution(context, finalNode);
            context.BudgetSpent = it;
            context.Status      = SearchContext <D, P, A, S, Sol> .SearchStatus.Success;
        }
Exemplo n.º 6
0
        /// <summary>
        /// Selects an Action by using the NaïveSampling method and expands the tree with this action if it is not already present.
        /// </summary>
        /// <param name="context">The current search context.</param>
        /// <param name="node">The node from which to expand the tree.</param>
        /// <param name="gMAB">The global Multi-Armed-Bandit collection.</param>
        /// <param name="endTime">When running on a time budget, this indicates when the search should stop.</param>
        /// <param name="it">The iteration count of the main search.</param>
        /// <returns>A <see cref="TreeSearchNode{S,A}"/> from which represents the selected node for the Simulation phase.</returns>
        private TreeSearchNode <P, A> NaïveSelectAndExpand(SearchContext <D, P, A, S, Sol> context, TreeSearchNode <P, A> node, IDictionary <long, Dictionary <int, LocalArm> > gMAB, DateTime endTime, ref int it)
        {
            // a = NaïveSampling(node.state, node.state.currentPlayer)
            // if `a' leads to a child of `node'
            // then
            //      return SelectAndExpand(node.GetChild(a))
            // else
            //      newNode = Apply(node.state, a)
            //      node.AddChild(newNode, a)
            //      return newNode

            // Find an action through the NaïveSampling process
            var action     = NaïveSampling(context, node.State, gMAB);
            var actionHash = action.GetHashCode();

            // Check if any of the children of the current node have the sampled action as their payload
            var existingChild = node.Children.FirstOrDefault(i => i.PayloadHash == actionHash);

            if (existingChild != null)
            {
                // Move down the tree unless we have reached a terminal node, or are out of time
                if (existingChild.State.IsTerminal() || (Time != Constants.NO_LIMIT_ON_THINKING_TIME && DateTime.Now >= endTime))
                {
                    return(existingChild);
                }
                // Increase the iteration count, since we'll be doing more sampling and simulating
                it++;
                return(NaïveSelectAndExpand(context, existingChild, gMAB, endTime, ref it));
            }

            // If none of the current children on the node have the action as payload, create a new child
            var newState = context.Application.Apply(context, context.Cloner.Clone(node.State), action);
            var newNode  = new TreeSearchNode <P, A>(node, newState, action);

            // Add it to the node's children and return the child
            node.AddChild(newNode);
            return(newNode);
        }
 /// <summary>
 /// Determines the UCB-score of the argument node.
 /// </summary>
 /// <param name="node">The node to score.</param>
 /// <returns>Double representing the node's UCB score.</returns>
 public double Score(TreeSearchNode <P, A> node)
 {
     return(Util.UCB(node.Score, node.Visits, node.Parent.Visits, C));
 }
Exemplo n.º 8
0
        /// <summary>
        /// Perform the search. Note: should set the Solution in the SearchContext and update its Status.
        /// </summary>
        /// <param name="context">The context within which the search happens.</param>
        public override void Search(SearchContext <D, P, A, S, Sol> context)
        {
            var clone     = context.Cloner;
            var rootState = context.Source;
            var apply     = context.Application;
            var goal      = context.Goal;

            var endTime = DateTime.Now.AddMilliseconds(Time);
            var it      = 0;

            // Setup for when we might be continuing a search from a specific node.
            var root = (TreeSearchNode <P, A>)context.StartNode;

            if (root == null)
            {
                root = new TreeSearchNode <P, A>(clone.Clone(rootState));
                context.StartNode = root;
            }

            while ((Time == Constants.NO_LIMIT_ON_THINKING_TIME || DateTime.Now < endTime) && (Iterations == Constants.NO_LIMIT_ON_ITERATIONS || it < Iterations))
            {
                it++;

                var worldState = clone.Clone(rootState);

                // Selection
                bool done;
                var  target = root;
                while (!(done = goal.Done(context, worldState)) && target.IsFullyExpanded())
                {
                    target     = SelectionStrategy.SelectNextNode(context, target);
                    worldState = apply.Apply(context, worldState, target.Payload);
                }

                // Expansion
                var endState = worldState;
                if (!done)
                {
                    var result = ExpansionStrategy.Expand(context, target, endState);
                    if (result != target)
                    {
                        endState = apply.Apply(context, endState, result.Payload);
                        target   = result;
                    }

                    // Simulation
                    endState = PlayoutStrategy.Playout(context, endState);
                }

                // Keep track of the maximum depth we reach
                var nodeDepth = target.CalculateDepth();
                if (nodeDepth > MaxDepth)
                {
                    MaxDepth = nodeDepth;
                }

                // Backpropagation
                BackPropagationStrategy.BackPropagate(context, EvaluationStrategy, target, endState);
            }

            var finalNode = FinalNodeSelectionStrategy.SelectFinalNode(context, root);

            context.Solution    = SolutionStrategy.Solution(context, finalNode);
            context.BudgetSpent = it;
            context.Status      = SearchContext <D, P, A, S, Sol> .SearchStatus.Success;
        }
        /// <summary>
        /// Returns the value of the argument state with respect to the argument node.
        /// </summary>
        /// <param name="context">The context of the search.</param>
        /// <param name="node">The node that provides the context to evaluate the state.</param>
        /// <param name="state">The state that should be evaluated.</param>
        /// <returns>Double representing the value of the state with respect to the node.</returns>
        public double Evaluate(SearchContext <List <SabberStoneAction>, SabberStoneState, SabberStoneAction, object, SabberStoneAction> context, TreeSearchNode <SabberStoneState, SabberStoneAction> node, SabberStoneState state)
        {
            // Check if we can and want to use the HeuristicBot's evaluation
            if (UseHeuristicBotEvaluation)
            {
                // This scoring function is actually used to score the effect of tasks, but we are using it here to score the effect of the transition from our Source state to the state from which we are currently evaluating.
                // TODO using the HeuristicBot's evaluation function could be improved
                var heuristicEvaluation = HeuristicAgent.EvaluateStateTransition(context.Source, state);
                // Colour the evaluation depending on who the active player is in the state
                var isRootPlayer = state.CurrentPlayer() == context.Source.CurrentPlayer();
                heuristicEvaluation = isRootPlayer ? heuristicEvaluation : heuristicEvaluation * -1;
                // Normalise the value between -1 and 1. The min and max values have been empirically set and equal the min and max possible evaluations that are returned by the HeuristicBot's function.
                var norm = 2 * Util.Normalise(heuristicEvaluation, -50, 50) - 1; // Note: this is a transformation from [0,1] to [-1,1]
                return(norm);
            }

            var rootPlayerId = context.Source.CurrentPlayer();
            var rootPlayer   = state.Player1.Id == rootPlayerId ? state.Player1 : state.Player2;
            var opponent     = rootPlayer.Opponent;

            // Check for a win/loss
            if (state.PlayerWon != State.DRAW)
            {
                return(state.PlayerWon == rootPlayerId ? 1 : -1);
            }

            // Gather stats that we need
            // TODO gather stats from cards in hand

            // Opponent HP
            var oH = opponent.Hero.Health;
            // Opponent's Taunt Minions HP
            var opponentMinions = opponent.BoardZone.GetAll();
            var oMtH            = opponentMinions.Where(i => i.HasTaunt).Sum(j => j.Health);
            // Opponent's Unknown HP in Hand
            var oUHh = 0;
            // Opponent's Unknown Direct Damage in Hand
            var oDdh = 0;
            // Opponent's Minion Power
            var oMP = opponentMinions.Where(i => i.CanAttack).Sum(j => j.AttackDamage);
            // Opponent's Unknown Minion Power from Hand
            var oUMPh = 0;
            // Opponent's Weapon Damage
            var oWD = opponent.Hero.Weapon?.AttackDamage ?? 0;
            // Opponent's Fatigue Damage
            var oFD = opponent.DeckZone.IsEmpty ? opponent.Hero.Fatigue + 1 : 0;

            // Root Player HP
            var rH = rootPlayer.Hero.Health;
            // Root Player's Taunt Minions HP
            var rootPlayerMinions = rootPlayer.BoardZone.GetAll();
            var rMtH = rootPlayerMinions.Where(i => i.HasTaunt).Sum(j => j.Health);
            // Root Player's HP in Hand
            var rHh = 0;
            // Root Player's Direct Damage in Hand
            var rDdh = 0;
            // Root Player's Minion Power
            var rMP = rootPlayerMinions.Where(i => i.CanAttack).Sum(j => j.AttackDamage);
            // Root Player's Minion Power from Hand
            var rMPh = 0;
            // Root Player's Weapon Damage
            var rWD = rootPlayer.Hero.Weapon?.AttackDamage ?? 0;
            // Root Player's Fatigue Damage
            var rFD = rootPlayer.DeckZone.IsEmpty ? rootPlayer.Hero.Fatigue + 1 : 0;

            // Calculate the approximate turns before the opponent dies
            var opponentHealth   = oH + oMtH + oUHh - oFD - rDdh;
            var rootPlayerDamage = rMP + rMPh + rWD;
            var oTD = rootPlayerDamage > 0 ? opponentHealth / (rootPlayerDamage * 1.0) : int.MaxValue;
            // Calculate the approximate turns before the root player dies
            var rootPlayerHealth = rH + rMtH + rHh - rFD - oDdh;
            var opponentDamage   = oMP + oUMPh + oWD;
            var rTD = opponentDamage > 0 ? rootPlayerHealth / (opponentDamage * 1.0) : int.MaxValue;

            // Check some situations
            var canKillOpponentThisTurn       = (int)Math.Ceiling(oTD) == 1;
            var canBeKilledByOpponentThisTurn = (int)Math.Ceiling(rTD) == 1;
            var notARaceSituation             = oTD >= 4 && rTD >= 4;

            // If the root player can kill the opponent, evaluation is 1
            if (canKillOpponentThisTurn)
            {
                return(1);
            }
            // If opponent can't be killed, but they can kill root player next turn, evaluation is -1
            if (canBeKilledByOpponentThisTurn)
            {
                return(-1);
            }
            // If this is not a racing situation (yet), return a cautious number
            if (notARaceSituation)
            {
                // Two aspects here (to keep it simple)
                // -> root player's HP vs opponent's HP
                // -> root player's #creatures vs opponent's #creatures

                // Having more HP ánd more creatures is quite good
                if (rH > oH && rootPlayerMinions.Length > opponentMinions.Length)
                {
                    return(0.75);
                }
                if (rH > oH && rootPlayerMinions.Length == opponentMinions.Length)
                {
                    return(0.25);
                }
                if (rH > oH && rootPlayerMinions.Length < opponentMinions.Length)
                {
                    return(0.1);
                }

                if (rH == oH && rootPlayerMinions.Length > opponentMinions.Length)
                {
                    return(0.33);
                }
                if (rH == oH && rootPlayerMinions.Length == opponentMinions.Length)
                {
                    return(0);
                }
                if (rH == oH && rootPlayerMinions.Length < opponentMinions.Length)
                {
                    return(-0.33);
                }

                if (rH < oH && rootPlayerMinions.Length > opponentMinions.Length)
                {
                    return(-0.1);
                }
                if (rH < oH && rootPlayerMinions.Length == opponentMinions.Length)
                {
                    return(-0.25);
                }
                // Having less HP ánd less creatures is quite bad
                if (rH < oH && rootPlayerMinions.Length < opponentMinions.Length)
                {
                    return(-0.75);
                }
            }

            // If none of the above applies, look at the difference between when the opponent dies and when the root player dies
            var difference = oTD - rTD;

            // If the difference is between -1 and 1, it is too close to tell
            if (difference >= -1 && difference <= 1)
            {
                return(0);
            }
            // If the difference is negative, it means the root player would die later than the opponent, so the root player would be slightly ahead
            if (difference < -1)
            {
                return(0.5);
            }
            // If the difference is positive, it means the opponent would die later than the root player, so the root player would be losing slightly
            if (difference > 1)
            {
                return(-0.5);
            }

            throw new ArgumentOutOfRangeException($"Evaluation values do not fall into the expected range: oTD={oTD:F3} | rTD={rTD:F3}");
        }
        /// <inheritdoc />
        public SabberStoneAction Solution(SearchContext <List <SabberStoneAction>, SabberStoneState, SabberStoneAction, object, SabberStoneAction> context, TreeSearchNode <SabberStoneState, SabberStoneAction> node)
        {
            // Check if we're trying to make a solution for a search with Hierarchical Expansion (HE).
            if (HierarchicalExpansion)
            {
                var solution     = new SabberStoneAction();
                var rootPlayerId = context.Source.CurrentPlayer();
                var mcts         = (MCTS <List <SabberStoneAction>, SabberStoneState, SabberStoneAction, object, SabberStoneAction>)context.Search;
                var selection    = mcts.SelectionStrategy;

                // The final-node selection strategy has chosen a child of the root as final-node.
                // We now have to piece together the complete action until the turn goes to the opponent.
                while (node.Payload.Player() == rootPlayerId)
                {
                    var task = node.Payload.Tasks.First();

                    solution.AddTask(task);
                    TaskValues.Add(new Tuple <SabberStonePlayerTask, double>(task, node.CalculateScore(NodeEvaluation)));

                    // Move to the next node in the tree, unless we are currently at a leaf node
                    if (node.IsLeaf())
                    {
                        break;
                    }
                    node = selection.SelectNextNode(context, node);
                }

                return(solution);
            }

            // If not HE, the task values are combined for the action, so just assign the action's value to each task in the list.
            foreach (var payloadTask in node.Payload.Tasks)
            {
                TaskValues.Add(new Tuple <SabberStonePlayerTask, double>(payloadTask, node.CalculateScore(NodeEvaluation)));
            }

            return(node.Payload);
        }