Esempio n. 1
0
        public override ExplorerDecision <int> MapContext(PRG random, float[] weights, int numActions)
        {
            int numWeights = weights.Length;

            if (numActions != int.MaxValue && numWeights != numActions)
            {
                throw new ArgumentException("The number of weights returned by the scorer must equal number of actions");
            }

            // Create a discrete_distribution based on the returned weights. This class handles the
            // case where the sum of the weights is < or > 1, by normalizing agains the sum.
            float total = 0f;

            for (int i = 0; i < numWeights; i++)
            {
                if (weights[i] < 0)
                {
                    throw new ArgumentException("Scores must be non-negative.");
                }

                total += weights[i];
            }

            if (total == 0)
            {
                throw new ArgumentException("At least one score must be positive.");
            }

            float draw = random.UniformUnitInterval();

            float sum = 0f;
            float actionProbability = 0f;
            int   actionIndex       = numWeights - 1;

            for (int i = 0; i < numWeights; i++)
            {
                weights[i] = weights[i] / total;
                sum       += weights[i];
                // This needs to be >=, not >, in case the random draw = 1.0, since sum would never
                // be > 1.0 and the loop would exit without assigning the right action probability.
                if (sum >= draw)
                {
                    actionIndex       = i;
                    actionProbability = weights[i];
                    break;
                }
            }

            actionIndex++;

            // action id is one-based
            return(ExplorerDecision.Create(
                       actionIndex,
                       new GenericExplorerState {
                Probability = actionProbability
            },
                       true));
        }
Esempio n. 2
0
        internal static int[] SampleWithoutReplacement(float[] probabilities, int size, PRG randomGenerator, ref float topActionProbability)
        {
            for (int i = 0; i < size; i++)
            {
                if (probabilities[i] == 1f)
                {
                    throw new ArgumentException("The resulting probability distribution is deterministic and thus cannot generate a list of unique actions.");
                }
            }

            int[]  actions = Enumerable.Repeat(0, size).ToArray();
            bool[] exists  = new bool[actions.Length + 1]; // plus 1 since action index is 1-based

            // sample without replacement
            int   runningIndex = 0;
            int   runningAction = 0;
            float draw, sum;

            while (runningIndex < size)
            {
                draw = randomGenerator.UniformUnitInterval();
                sum  = 0;

                for (int i = 0; i < size; i++)
                {
                    sum += probabilities[i];
                    if (sum > draw)
                    {
                        runningAction = i + 1;

                        // check for duplicate
                        if (exists[runningAction])
                        {
                            continue;
                        }

                        // store newly sampled action
                        if (runningIndex == 0)
                        {
                            topActionProbability = probabilities[i];
                        }
                        actions[runningIndex++] = runningAction;
                        exists[runningAction]   = true;
                        break;
                    }
                }
            }
            return(actions);
        }
Esempio n. 3
0
        public override ExplorerDecision <int> MapContext(PRG random, int policyAction, int numActionsVariable)
        {
            if (policyAction == 0 || policyAction > numActionsVariable)
            {
                throw new ArgumentException("Action chosen by default policy is not within valid range.");
            }

            float actionProbability;
            bool  isExplore;

            float epsilon         = explore ? this.defaultEpsilon : 0f;
            float baseProbability = epsilon / numActionsVariable; // uniform probability

            if (random.UniformUnitInterval() < 1f - epsilon)
            {
                actionProbability = 1f - epsilon + baseProbability;
                isExplore         = false;
            }
            else
            {
                // Get uniform random 1-based action ID
                int actionId = random.UniformInt(1, numActionsVariable);

                if (actionId == policyAction)
                {
                    // If it matches the one chosen by the default policy
                    // then increase the probability
                    actionProbability = 1f - epsilon + baseProbability;
                }
                else
                {
                    // Otherwise it's just the uniform probability
                    actionProbability = baseProbability;
                }
                policyAction = actionId;
                isExplore    = true;
            }

            EpsilonGreedyState explorerState = new EpsilonGreedyState
            {
                Epsilon     = epsilon,
                IsExplore   = isExplore,
                Probability = actionProbability
            };

            return(ExplorerDecision.Create(policyAction, explorerState, true));
        }
Esempio n. 4
0
        public ExplorerDecision <int[]> MapContext(PRG random, int[] policyAction, int numActions)
        {
            MultiActionHelper.ValidateActionList(policyAction);

            float epsilon = this.explore ? this.defaultEpsilon : 0f;

            int[] chosenAction;
            bool  isExplore;

            if (random.UniformUnitInterval() < epsilon)
            {
                // 1 ... n
                chosenAction = Enumerable.Range(1, policyAction.Length).ToArray();

                // 0 ... n - 2
                for (int i = 0; i < policyAction.Length - 1; i++)
                {
                    int swapIndex = random.UniformInt(i, policyAction.Length - 1);

                    int temp = chosenAction[swapIndex];
                    chosenAction[swapIndex] = chosenAction[i];
                    chosenAction[i]         = temp;
                }

                isExplore = true;
            }
            else
            {
                chosenAction = policyAction;
                isExplore    = false;
            }

            EpsilonGreedySlateState explorerState = new EpsilonGreedySlateState
            {
                Epsilon   = this.defaultEpsilon,
                IsExplore = isExplore,
                Ranking   = policyAction
            };

            return(ExplorerDecision.Create(chosenAction, explorerState, true));
        }
Esempio n. 5
0
        public override ExplorerDecision <int[]> MapContext(PRG prg, ActionProbability[] actionProbs, int numActions)
        {
            // Create a discrete_distribution based on the returned actionProbs. This class handles the
            // case where the sum of the actionProbs is < or > 1, by normalizing agains the sum.
            float total = 0f;

            foreach (var ap in actionProbs)
            {
                if (ap.Probability < 0)
                {
                    throw new ArgumentException("Probabilities must be non-negative.");
                }

                total += ap.Probability;
            }

            if (total == 0)
            {
                throw new ArgumentException("At least one probability must be positive.");
            }

            if (Math.Abs(total - 1f) > 1e-4)
            {
                throw new ArgumentException($"Probabilities must sum to one, but {Math.Abs(total - 1f)} was received.");
            }

            float draw = prg.UniformUnitInterval();

            float sum          = 0f;
            var   actionChosen = actionProbs.Last();

            foreach (var ap in actionProbs)
            {
                sum += ap.Probability;
                if (sum > draw)
                {
                    actionChosen = ap;
                    break;
                }
            }

            // top slot explorer
            var action          = actionChosen.Action;
            var probability     = actionChosen.Probability;
            var actionList      = new int[actionProbs.Length];
            var probabilityList = new float[actionProbs.Length];

            for (int i = 0; i < actionList.Length; i++)
            {
                actionList[i]      = actionProbs[i].Action;
                probabilityList[i] = actionProbs[i].Probability;

                if (action == actionList[i])
                {
                    // swap both
                    actionList[i] = actionList[0];
                    actionList[0] = action;

                    probabilityList[i] = probabilityList[0];
                    probabilityList[0] = probability;
                }
            }

            // action id is 1-based
            return(ExplorerDecision.Create(
                       actionList,
                       new GenericTopSlotExplorerState {
                Probabilities = probabilityList
            },
                       true));
        }
Esempio n. 6
0
        public override ExplorerDecision <int> MapContext(PRG random, float[] scores, int numActions)
        {
            int numScores = scores.Length;

            if (numActions != int.MaxValue && numScores != numActions)
            {
                throw new ArgumentException("The number of scores returned by the scorer must equal number of actions");
            }

            int   i        = 0;
            float maxScore = scores.Max();

            float actionProbability = 0f;
            int   actionIndex       = 0;

            if (this.explore)
            {
                // Create a normalized exponential distribution based on the returned scores
                for (i = 0; i < numScores; i++)
                {
                    scores[i] = (float)Math.Exp(this.lambda * (scores[i] - maxScore));
                }

                // Create a discrete_distribution based on the returned weights. This class handles the
                // case where the sum of the weights is < or > 1, by normalizing agains the sum.
                float total = scores.Sum();

                float draw = random.UniformUnitInterval();

                float sum = 0f;
                actionProbability = 0f;
                actionIndex       = numScores - 1;
                for (i = 0; i < numScores; i++)
                {
                    scores[i] = scores[i] / total;
                    sum      += scores[i];
                    if (sum >= draw)
                    {
                        actionIndex       = i;
                        actionProbability = scores[i];
                        break;
                    }
                }
            }
            else
            {
                maxScore = 0f;
                for (i = 0; i < numScores; i++)
                {
                    if (maxScore < scores[i])
                    {
                        maxScore    = scores[i];
                        actionIndex = i;
                    }
                }
                actionProbability = 1f; // Set to 1 since we always pick the highest one.
            }

            actionIndex++;

            // action id is one-based
            return(ExplorerDecision.Create(actionIndex,
                                           new GenericExplorerState {
                Probability = actionProbability
            },
                                           true));
        }