Esempio n. 1
        public override ExplorerDecision <int> MapContext(PRG random, float[] weights, int numActions)
            int numWeights = weights.Length;

            if (numActions != int.MaxValue && numWeights != numActions)
                throw new ArgumentException("The number of weights returned by the scorer must equal number of actions");

            // Create a discrete_distribution based on the returned weights. This class handles the
            // case where the sum of the weights is < or > 1, by normalizing agains the sum.
            float total = 0f;

            for (int i = 0; i < numWeights; i++)
                if (weights[i] < 0)
                    throw new ArgumentException("Scores must be non-negative.");

                total += weights[i];

            if (total == 0)
                throw new ArgumentException("At least one score must be positive.");

            float draw = random.UniformUnitInterval();

            float sum = 0f;
            float actionProbability = 0f;
            int   actionIndex       = numWeights - 1;

            for (int i = 0; i < numWeights; i++)
                weights[i] = weights[i] / total;
                sum       += weights[i];
                // This needs to be >=, not >, in case the random draw = 1.0, since sum would never
                // be > 1.0 and the loop would exit without assigning the right action probability.
                if (sum >= draw)
                    actionIndex       = i;
                    actionProbability = weights[i];


            // action id is one-based
                       new GenericExplorerState {
                Probability = actionProbability
Esempio n. 2
        internal static int[] SampleWithoutReplacement(float[] probabilities, int size, PRG randomGenerator, ref float topActionProbability)
            for (int i = 0; i < size; i++)
                if (probabilities[i] == 1f)
                    throw new ArgumentException("The resulting probability distribution is deterministic and thus cannot generate a list of unique actions.");

            int[]  actions = Enumerable.Repeat(0, size).ToArray();
            bool[] exists  = new bool[actions.Length + 1]; // plus 1 since action index is 1-based

            // sample without replacement
            int   runningIndex = 0;
            int   runningAction = 0;
            float draw, sum;

            while (runningIndex < size)
                draw = randomGenerator.UniformUnitInterval();
                sum  = 0;

                for (int i = 0; i < size; i++)
                    sum += probabilities[i];
                    if (sum > draw)
                        runningAction = i + 1;

                        // check for duplicate
                        if (exists[runningAction])

                        // store newly sampled action
                        if (runningIndex == 0)
                            topActionProbability = probabilities[i];
                        actions[runningIndex++] = runningAction;
                        exists[runningAction]   = true;
Esempio n. 3
        public override ExplorerDecision <int> MapContext(PRG random, int policyAction, int numActionsVariable)
            if (policyAction == 0 || policyAction > numActionsVariable)
                throw new ArgumentException("Action chosen by default policy is not within valid range.");

            float actionProbability;
            bool  isExplore;

            float epsilon         = explore ? this.defaultEpsilon : 0f;
            float baseProbability = epsilon / numActionsVariable; // uniform probability

            if (random.UniformUnitInterval() < 1f - epsilon)
                actionProbability = 1f - epsilon + baseProbability;
                isExplore         = false;
                // Get uniform random 1-based action ID
                int actionId = random.UniformInt(1, numActionsVariable);

                if (actionId == policyAction)
                    // If it matches the one chosen by the default policy
                    // then increase the probability
                    actionProbability = 1f - epsilon + baseProbability;
                    // Otherwise it's just the uniform probability
                    actionProbability = baseProbability;
                policyAction = actionId;
                isExplore    = true;

            EpsilonGreedyState explorerState = new EpsilonGreedyState
                Epsilon     = epsilon,
                IsExplore   = isExplore,
                Probability = actionProbability

            return(ExplorerDecision.Create(policyAction, explorerState, true));
Esempio n. 4
        public ExplorerDecision <int[]> MapContext(PRG random, int[] policyAction, int numActions)

            float epsilon = this.explore ? this.defaultEpsilon : 0f;

            int[] chosenAction;
            bool  isExplore;

            if (random.UniformUnitInterval() < epsilon)
                // 1 ... n
                chosenAction = Enumerable.Range(1, policyAction.Length).ToArray();

                // 0 ... n - 2
                for (int i = 0; i < policyAction.Length - 1; i++)
                    int swapIndex = random.UniformInt(i, policyAction.Length - 1);

                    int temp = chosenAction[swapIndex];
                    chosenAction[swapIndex] = chosenAction[i];
                    chosenAction[i]         = temp;

                isExplore = true;
                chosenAction = policyAction;
                isExplore    = false;

            EpsilonGreedySlateState explorerState = new EpsilonGreedySlateState
                Epsilon   = this.defaultEpsilon,
                IsExplore = isExplore,
                Ranking   = policyAction

            return(ExplorerDecision.Create(chosenAction, explorerState, true));
Esempio n. 5
        public override ExplorerDecision <int[]> MapContext(PRG prg, ActionProbability[] actionProbs, int numActions)
            // Create a discrete_distribution based on the returned actionProbs. This class handles the
            // case where the sum of the actionProbs is < or > 1, by normalizing agains the sum.
            float total = 0f;

            foreach (var ap in actionProbs)
                if (ap.Probability < 0)
                    throw new ArgumentException("Probabilities must be non-negative.");

                total += ap.Probability;

            if (total == 0)
                throw new ArgumentException("At least one probability must be positive.");

            if (Math.Abs(total - 1f) > 1e-4)
                throw new ArgumentException($"Probabilities must sum to one, but {Math.Abs(total - 1f)} was received.");

            float draw = prg.UniformUnitInterval();

            float sum          = 0f;
            var   actionChosen = actionProbs.Last();

            foreach (var ap in actionProbs)
                sum += ap.Probability;
                if (sum > draw)
                    actionChosen = ap;

            // top slot explorer
            var action          = actionChosen.Action;
            var probability     = actionChosen.Probability;
            var actionList      = new int[actionProbs.Length];
            var probabilityList = new float[actionProbs.Length];

            for (int i = 0; i < actionList.Length; i++)
                actionList[i]      = actionProbs[i].Action;
                probabilityList[i] = actionProbs[i].Probability;

                if (action == actionList[i])
                    // swap both
                    actionList[i] = actionList[0];
                    actionList[0] = action;

                    probabilityList[i] = probabilityList[0];
                    probabilityList[0] = probability;

            // action id is 1-based
                       new GenericTopSlotExplorerState {
                Probabilities = probabilityList
Esempio n. 6
        public override ExplorerDecision <int> MapContext(PRG random, float[] scores, int numActions)
            int numScores = scores.Length;

            if (numActions != int.MaxValue && numScores != numActions)
                throw new ArgumentException("The number of scores returned by the scorer must equal number of actions");

            int   i        = 0;
            float maxScore = scores.Max();

            float actionProbability = 0f;
            int   actionIndex       = 0;

            if (this.explore)
                // Create a normalized exponential distribution based on the returned scores
                for (i = 0; i < numScores; i++)
                    scores[i] = (float)Math.Exp(this.lambda * (scores[i] - maxScore));

                // Create a discrete_distribution based on the returned weights. This class handles the
                // case where the sum of the weights is < or > 1, by normalizing agains the sum.
                float total = scores.Sum();

                float draw = random.UniformUnitInterval();

                float sum = 0f;
                actionProbability = 0f;
                actionIndex       = numScores - 1;
                for (i = 0; i < numScores; i++)
                    scores[i] = scores[i] / total;
                    sum      += scores[i];
                    if (sum >= draw)
                        actionIndex       = i;
                        actionProbability = scores[i];
                maxScore = 0f;
                for (i = 0; i < numScores; i++)
                    if (maxScore < scores[i])
                        maxScore    = scores[i];
                        actionIndex = i;
                actionProbability = 1f; // Set to 1 since we always pick the highest one.


            // action id is one-based
                                           new GenericExplorerState {
                Probability = actionProbability