Beispiel #1
0
            public OddmentTable <int> Create(SearchContext <object, TicTacToeState, TicTacToeMove, object, TicTacToeMove> context, int samplesForGeneration)
            {
                var table = new Dictionary <int, double>();

                for (var i = 0; i < samplesForGeneration; i++)
                {
                    var action   = new TicTacToeMove(TicTacToeMoveGenerator.AllEmptyPositions(context.Source).RandomElementOrDefault(), context.Source.ActivePlayerID);
                    var newState = GameLogic.Apply(context, (TicTacToeState)context.Source.Copy(), action);
                    var endState = PlayoutStrategy.Playout(context, newState);
                    var value    = EvaluationStrategy.Evaluate(context, new TreeSearchNode <TicTacToeState, TicTacToeMove>(action), endState);
                    if (!table.ContainsKey(action.PositionToPlace))
                    {
                        table.Add(action.PositionToPlace, 0);
                    }
                    table[action.PositionToPlace] += value;
                }

                var maxValue = table.Values.Max();
                var minValue = table.Values.Min();

                var oddmentTable = new OddmentTable <int>();

                foreach (var kvPair in table)
                {
                    var normalisedValue = Util.Normalise(kvPair.Value, minValue, maxValue);
                    oddmentTable.Add(kvPair.Key, normalisedValue, recalculate: false);
                }
                oddmentTable.Recalculate();

                return(oddmentTable);
            }
Beispiel #2
0
        /// <summary>
        /// Uses the local Multi-Armed-Bandits to explore the action space and uses the global Multi-Armed-Bandit to exploit the best performing actions.
        /// </summary>
        /// <param name="context">The current search context.</param>
        /// <param name="state">The game state for the node.</param>
        /// <param name="gMAB">The global Multi-Armed-Bandit.</param>
        /// <returns>An <see cref="A"/> that was selected from the global Multi-Armed-Bandit.</returns>
        private A NaïveSampling(SearchContext <D, P, A, S, Sol> context, P state, IDictionary <long, Dictionary <int, LocalArm> > gMAB)
        {
            var apply      = context.Application;
            var stateClone = context.Cloner.Clone(state);
            var stateHash  = stateClone.HashMethod();

            if (!gMAB.ContainsKey(stateHash))
            {
                gMAB.Add(stateHash, new Dictionary <int, LocalArm>());
            }

            // Use a policy p_0 to determine whether to explore or exploit
            // If explore was selected
            //      x_1...x_n is sampled by using a policy p_l to select a value for each X_i in X independently.
            //      As a side effect, the resulting value combination is added to the global MAB.
            // If exploit was selected
            //      x_1...x_n is sampled by using a policy p_g to select a value combination using MAB_g.

            // Can only exploit if there is anything to exploit in the first place
            if (gMAB[stateHash].IsNullOrEmpty() || ExplorationStrategy.Policy(context, 0))
            {
                // Explore

                // Create an action according to policy p_1
                var action     = SamplingStrategy.Sample(stateClone);
                var actionHash = action.GetHashCode();
                // Evaluate the sampled action
                var endState = PlayoutStrategy.Playout(context, apply.Apply(context, stateClone, action));
                var tempNode = new TreeSearchNode <P, A> {
                    Payload = action
                };
                var reward = EvaluationStrategy.Evaluate(context, tempNode, endState);
                // Add the action to the global MAB
                if (gMAB[stateHash].ContainsKey(actionHash))
                {
                    gMAB[stateHash][actionHash].Visit(reward);
                }
                else
                {
                    var newArm = new LocalArm(action);
                    newArm.Visit(reward);
                    gMAB[stateHash].Add(actionHash, newArm);
                }

                return(action);
            }

            // Exploit; epsilon-greedy by returning the action with the highest expected reward with probability 1-e, otherwise returning random.
            return(_rng.NextDouble() <= 1 - PolicyGlobal ? gMAB[stateHash].Values.OrderByDescending(i => i.ExpectedReward).First().Action : gMAB[stateHash].RandomElementOrDefault().Value.Action);
        }
Beispiel #3
0
        /// <summary>
        /// Perform the search. Note: should set the Solution in the SearchContext and update its Status.
        /// </summary>
        /// <param name="context">The context within which the search happens.</param>
        public override void Search(SearchContext <D, P, A, S, Sol> context)
        {
            var clone     = context.Cloner;
            var rootState = context.Source;

            var endTime = DateTime.Now.AddMilliseconds(Time);
            var it      = 0;

            // Setup for when we might be continuing a search from a specific node.
            var root = (TreeSearchNode <P, A>)context.StartNode;

            if (root == null)
            {
                root = new TreeSearchNode <P, A>(clone.Clone(rootState));
                context.StartNode = root;
            }

            // Set up a global MAB, to hold the value combinations created during the naïve-sampling process.
            var gMAB = new Dictionary <long, Dictionary <int, LocalArm> >();

            while ((Time == Constants.NO_LIMIT_ON_THINKING_TIME || DateTime.Now < endTime) && (Iterations == Constants.NO_LIMIT_ON_ITERATIONS || it < Iterations))
            {
                it++;

                // SelectAndExpand, reference the iteration counter because it might be updated in the recursive call
                var selectedNode = NaïveSelectAndExpand(context, root, gMAB, endTime, ref it);

                // Keep track of the maximum depth we reach
                var nodeDepth = selectedNode.CalculateDepth();
                if (nodeDepth > MaxDepth)
                {
                    MaxDepth = nodeDepth;
                }

                // Simulate
                var endState = PlayoutStrategy.Playout(context, (P)selectedNode.State.Copy());

                // Backpropagation
                BackPropagationStrategy.BackPropagate(context, EvaluationStrategy, selectedNode, endState);
            }

            var finalNode = FinalNodeSelectionStrategy.SelectFinalNode(context, root);

            context.Solution    = SolutionStrategy.Solution(context, finalNode);
            context.BudgetSpent = it;
            context.Status      = SearchContext <D, P, A, S, Sol> .SearchStatus.Success;
        }