public override ISearchStrategy <object, TicTacToeState, TicTacToeMove, object, TicTacToeMove> SetupNMCTS(ISamplingStrategy <TicTacToeState, TicTacToeMove> samplingStrategy)
        {
            var builder = NMCTS <object, TicTacToeState, TicTacToeMove, object, TicTacToeMove> .Builder();

            builder.Iterations          = 10000;
            builder.PlayoutStrategy     = new AgentPlayout <object, TicTacToeState, TicTacToeMove, object, TicTacToeMove>(Agent);
            builder.EvaluationStrategy  = EvaluationStrategy;
            builder.SolutionStrategy    = new ActionSolution <object, TicTacToeState, TicTacToeMove, object, TicTacToeMove, TreeSearchNode <TicTacToeState, TicTacToeMove> >();
            builder.ExplorationStrategy = new ChanceExploration <object, TicTacToeState, TicTacToeMove, object, TicTacToeMove>(0.5);
            builder.PolicyGlobal        = 0.2;
            builder.SamplingStrategy    = samplingStrategy;

            return(builder.Build());
        }
Пример #2
0
        /// <summary>
        /// Constructs a new instance of NMCTSBot with default strategies.
        /// </summary>
        /// <param name="allowPerfectInformation">[Optional] Whether or not this bot is allowed perfect information about the game state (i.e. no obfuscation and therefore no determinisation). Default value is false.</param>
        /// <param name="ensembleSize">[Optional] The size of the ensemble to use. Default value is 1.</param>
        /// <param name="playoutBotType">[Optional] The type of playout bot to be used during playouts. Default value is <see cref="PlayoutBotType.MAST"/>.</param>
        /// <param name="mastSelectionType">[Optional] The type of selection strategy used by the MAST playout. Default value is <see cref="MASTPlayoutBot.SelectionType.EGreedy"/>.</param>
        /// <param name="budgetType">[Optional] The type of budget that this bot will use. Default value is <see cref="BudgetType.Iterations"/>.</param>
        /// <param name="iterations">[Optional] The budget for the amount of iterations NMCTS can use. Default value is <see cref="Constants.DEFAULT_COMPUTATION_ITERATION_BUDGET"/>.</param>
        /// <param name="time">[Optional] The budget for the amount of milliseconds NMCTS can spend on searching. Default value is <see cref="Constants.DEFAULT_COMPUTATION_TIME_BUDGET"/>.</param>
        /// <param name="playoutTurnCutoff">[Optional] The amount of turns after which to stop a simulation. Default value is <see cref="Constants.DEFAULT_PLAYOUT_TURN_CUTOFF"/>.</param>
        /// <param name="globalPolicy">[Optional] The exploration-threshold for the e-greedy global policy. Default value is <see cref="Constants.DEFAULT_NMCTS_GLOBAL_POLICY"/>.</param>
        /// <param name="localPolicy">[Optional] The exploration-threshold for the e-greedy local policy. Default value is <see cref="Constants.DEFAULT_NMCTS_LOCAL_POLICY"/>.</param>
        /// <param name="useHeuristicEvaluation">[Optional] Whether or not to use the HeuristicBot's evaluation function. Default value is false.</param>
        /// <param name="debugInfoToConsole">[Optional] Whether or not to write debug information to the console. Default value is false.</param>
        public NMCTSBot(bool allowPerfectInformation  = false,
                        int ensembleSize              = 1,
                        PlayoutBotType playoutBotType = PlayoutBotType.MAST,
                        MASTPlayoutBot.SelectionType mastSelectionType = MASTPlayoutBot.SelectionType.EGreedy,
                        BudgetType budgetType       = BudgetType.Iterations,
                        int iterations              = Constants.DEFAULT_COMPUTATION_ITERATION_BUDGET,
                        long time                   = Constants.DEFAULT_COMPUTATION_TIME_BUDGET,
                        int playoutTurnCutoff       = Constants.DEFAULT_PLAYOUT_TURN_CUTOFF,
                        double globalPolicy         = Constants.DEFAULT_NMCTS_GLOBAL_POLICY,
                        double localPolicy          = Constants.DEFAULT_NMCTS_LOCAL_POLICY,
                        bool useHeuristicEvaluation = false,
                        bool debugInfoToConsole     = false)
        {
            PerfectInformation = allowPerfectInformation;
            EnsembleSize       = ensembleSize;
            PlayoutBotType     = playoutBotType;
            MASTSelectionType  = mastSelectionType;
            BudgetType         = budgetType;
            Iterations         = iterations;
            Time = time;
            PlayoutTurnCutoff = playoutTurnCutoff;
            GlobalPolicy      = globalPolicy;
            LocalPolicy       = localPolicy;
            _debug            = debugInfoToConsole;

            // Create the ensemble search
            Ensemble = new EnsembleStrategySabberStone(enableStateObfuscation: true, enablePerfectInformation: PerfectInformation);

            // Simulation will be handled by the Playout
            var sabberStoneStateEvaluation = new EvaluationStrategyHearthStone(useHeuristicEvaluation);

            Playout = new PlayoutStrategySabberStone();

            // Set the playout bots
            switch (PlayoutBotType)
            {
            case PlayoutBotType.Random:
                MyPlayoutBot       = new RandomBot(filterDuplicatePositionTasks: true);
                OpponentPlayoutBot = new RandomBot(filterDuplicatePositionTasks: true);
                break;

            case PlayoutBotType.Heuristic:
                MyPlayoutBot       = new HeuristicBot();
                OpponentPlayoutBot = new HeuristicBot();
                break;

            case PlayoutBotType.MAST:
                MyPlayoutBot       = new MASTPlayoutBot(MASTSelectionType, sabberStoneStateEvaluation);
                OpponentPlayoutBot = new MASTPlayoutBot(MASTSelectionType, sabberStoneStateEvaluation);
                break;

            default:
                throw new InvalidEnumArgumentException($"PlayoutBotType `{PlayoutBotType}' is not supported.");
            }

            // And the random sampling bot
            RandomSamplingBot = new RandomBot(filterDuplicatePositionTasks: true);

            // We'll be cutting off the simulations after X turns, using a GoalStrategy
            Goal = new GoalStrategyTurnCutoff(PlayoutTurnCutoff);

            // Application and Goal will be handled by the GameLogic
            GameLogic = new SabberStoneGameLogic(Goal, false);

            // Build NMCTS
            Builder = NMCTS <List <SabberStoneAction>, SabberStoneState, SabberStoneAction, object, SabberStoneAction> .Builder();

            Builder.ExplorationStrategy = new ChanceExploration <List <SabberStoneAction>, SabberStoneState, SabberStoneAction, object, SabberStoneAction>(LocalPolicy);
            Builder.PlayoutStrategy     = Playout;
            Builder.PolicyGlobal        = GlobalPolicy;
            Builder.SamplingStrategy    = new SabberStoneNMCSamplingStrategy(RandomSamplingBot);
            Builder.SolutionStrategy    = new SolutionStrategySabberStone(false, new AverageScore <SabberStoneState, SabberStoneAction>());
            Builder.EvaluationStrategy  = sabberStoneStateEvaluation;
            switch (BudgetType)
            {
            case BudgetType.Iterations:
                Builder.Iterations = EnsembleSize > 0 ? Iterations / EnsembleSize : Iterations;     // Note: Integer division by design.
                break;

            case BudgetType.Time:
                Builder.Time = EnsembleSize > 0 ? Time / EnsembleSize : Time;     // Note: Integer division by design.
                break;

            default:
                throw new InvalidEnumArgumentException($"BudgetType `{BudgetType}' is not supported.");
            }
        }