Example #1
0
        public override ExplorerDecision <int[]> MapContext(PRG random, float[] weights, int numActions)
        {
            var decision = this.explorer.MapContext(random, weights, numActions);

            float actionProbability = 0f;

            int[] chosenActions = MultiActionHelper.SampleWithoutReplacement(weights, weights.Length, random, ref actionProbability);

            // action id is one-based
            return(ExplorerDecision.Create(chosenActions,
                                           new GenericExplorerState {
                Probability = actionProbability
            },
                                           true));
        }
Example #2
0
        public override ExplorerDecision <int[]> MapContext(PRG random, float[] scores, int numActions)
        {
            if (scores == null || scores.Length < 1)
            {
                throw new ArgumentException("Scores returned by default policy must not be empty.");
            }

            var decision = this.explorer.MapContext(random, scores, numActions);

            int numActionsVariable = scores.Length;

            int[] chosenActions;
            // Note: there might be a way using out generic parameters and explicit interface implementation to avoid the cast
            float actionProbability = ((GenericExplorerState)decision.ExplorerState).Probability;

            if (this.explore)
            {
                chosenActions = MultiActionHelper.SampleWithoutReplacement(scores, numActionsVariable, random, ref actionProbability);
            }
            else
            {
                // avoid linq to optimize perf
                chosenActions = new int[numActionsVariable];
                for (int i = 1; i <= numActionsVariable; i++)
                {
                    chosenActions[i] = i;
                }

                // swap max-score action with the first one
                int firstAction = chosenActions[0];
                chosenActions[0] = chosenActions[decision.Value];
                chosenActions[decision.Value] = firstAction;
            }

            return(ExplorerDecision.Create(chosenActions,
                                           decision.ExplorerState,
                                           decision.ShouldRecord));
        }