Exemplo n.º 1
0
        public override ExplorerDecision <int[]> MapContext(PRG prg, int[] ranking, int numActions)
        {
            if (ranking == null || ranking.Length < 1)
            {
                throw new ArgumentException("Actions chosen by default policy must not be empty.");
            }

            var decision = this.explorer.MapContext(prg, ranking[0], ranking.Length);

            MultiActionHelper.PutActionToList(decision.Value, ranking);

            return(ExplorerDecision.Create(ranking, decision.ExplorerState, decision.ShouldRecord));
        }
Exemplo n.º 2
0
        public override ExplorerDecision <int[]> MapContext(PRG random, float[] weights, int numActions)
        {
            var decision = this.explorer.MapContext(random, weights, numActions);

            float actionProbability = 0f;

            int[] chosenActions = MultiActionHelper.SampleWithoutReplacement(weights, weights.Length, random, ref actionProbability);

            // action id is one-based
            return(ExplorerDecision.Create(chosenActions,
                                           new GenericExplorerState {
                Probability = actionProbability
            },
                                           true));
        }
Exemplo n.º 3
0
        public ExplorerDecision <int[]> MapContext(PRG random, int[] policyAction, int numActions)
        {
            MultiActionHelper.ValidateActionList(policyAction);

            float epsilon = this.explore ? this.defaultEpsilon : 0f;

            int[] chosenAction;
            bool  isExplore;

            if (random.UniformUnitInterval() < epsilon)
            {
                // 1 ... n
                chosenAction = Enumerable.Range(1, policyAction.Length).ToArray();

                // 0 ... n - 2
                for (int i = 0; i < policyAction.Length - 1; i++)
                {
                    int swapIndex = random.UniformInt(i, policyAction.Length - 1);

                    int temp = chosenAction[swapIndex];
                    chosenAction[swapIndex] = chosenAction[i];
                    chosenAction[i]         = temp;
                }

                isExplore = true;
            }
            else
            {
                chosenAction = policyAction;
                isExplore    = false;
            }

            EpsilonGreedySlateState explorerState = new EpsilonGreedySlateState
            {
                Epsilon   = this.defaultEpsilon,
                IsExplore = isExplore,
                Ranking   = policyAction
            };

            return(ExplorerDecision.Create(chosenAction, explorerState, true));
        }
Exemplo n.º 4
0
        public override ExplorerDecision <int[]> MapContext(PRG random, float[] scores, int numActions)
        {
            if (scores == null || scores.Length < 1)
            {
                throw new ArgumentException("Scores returned by default policy must not be empty.");
            }

            var decision = this.explorer.MapContext(random, scores, numActions);

            int numActionsVariable = scores.Length;

            int[] chosenActions;
            // Note: there might be a way using out generic parameters and explicit interface implementation to avoid the cast
            float actionProbability = ((GenericExplorerState)decision.ExplorerState).Probability;

            if (this.explore)
            {
                chosenActions = MultiActionHelper.SampleWithoutReplacement(scores, numActionsVariable, random, ref actionProbability);
            }
            else
            {
                // avoid linq to optimize perf
                chosenActions = new int[numActionsVariable];
                for (int i = 1; i <= numActionsVariable; i++)
                {
                    chosenActions[i] = i;
                }

                // swap max-score action with the first one
                int firstAction = chosenActions[0];
                chosenActions[0] = chosenActions[decision.Value];
                chosenActions[decision.Value] = firstAction;
            }

            return(ExplorerDecision.Create(chosenActions,
                                           decision.ExplorerState,
                                           decision.ShouldRecord));
        }