示例#1
0
 public ExplorerDecision <int> Explore(PRG random, int numActions)
 {
     return(ExplorerDecision.Create(
                random.UniformInt(1, numActions),
                new GenericExplorerState {
         Probability = 1f
     },
                shouldRecord: true));
 }
示例#2
0
        public ExplorerDecision <TAction> MapContext(PRG random, IReadOnlyCollection <TAction> policyActions, int numActions)
        {
            // Invoke the default policy function to get the action
            TAction chosenDecision    = default(TAction);
            float   actionProbability = 0f;

            if (this.explore)
            {
                // Select bag
                int chosenBag = random.UniformInt(0, policyActions.Count - 1);

                int[] actionsSelected = Enumerable.Repeat <int>(0, numActions).ToArray();

                int currentBag = 0;
                foreach (var policyAction in policyActions)
                {
                    var actionFromBag = this.GetTopAction(policyAction);

                    if (actionFromBag == 0 || actionFromBag > numActions)
                    {
                        throw new ArgumentException("Action chosen by default policy is not within valid range.");
                    }

                    //this won't work if actions aren't 0 to Count
                    actionsSelected[actionFromBag - 1]++; // action id is one-based

                    if (currentBag == chosenBag)
                    {
                        chosenDecision = policyAction;
                    }

                    currentBag++;
                }

                actionProbability = (float)actionsSelected[this.GetTopAction(chosenDecision) - 1] / policyActions.Count; // action id is one-based
            }
            else
            {
                chosenDecision    = policyActions.First();
                actionProbability = 1f;
            }

            GenericExplorerState explorerState = new GenericExplorerState
            {
                Probability = actionProbability
            };

            return(ExplorerDecision.Create(chosenDecision, explorerState, true));
        }
示例#3
0
        public override ExplorerDecision <int> MapContext(PRG random, int policyAction, int numActionsVariable)
        {
            if (policyAction == 0 || policyAction > numActionsVariable)
            {
                throw new ArgumentException("Action chosen by default policy is not within valid range.");
            }

            float actionProbability;
            bool  isExplore;

            float epsilon         = explore ? this.defaultEpsilon : 0f;
            float baseProbability = epsilon / numActionsVariable; // uniform probability

            if (random.UniformUnitInterval() < 1f - epsilon)
            {
                actionProbability = 1f - epsilon + baseProbability;
                isExplore         = false;
            }
            else
            {
                // Get uniform random 1-based action ID
                int actionId = random.UniformInt(1, numActionsVariable);

                if (actionId == policyAction)
                {
                    // If it matches the one chosen by the default policy
                    // then increase the probability
                    actionProbability = 1f - epsilon + baseProbability;
                }
                else
                {
                    // Otherwise it's just the uniform probability
                    actionProbability = baseProbability;
                }
                policyAction = actionId;
                isExplore    = true;
            }

            EpsilonGreedyState explorerState = new EpsilonGreedyState
            {
                Epsilon     = epsilon,
                IsExplore   = isExplore,
                Probability = actionProbability
            };

            return(ExplorerDecision.Create(policyAction, explorerState, true));
        }
        public ExplorerDecision <int[]> Explore(PRG random, int numActionsVariable)
        {
            var ranking = Enumerable.Range(1, numActionsVariable).ToArray();

            for (int i = 0; i < ranking.Length - 1 && i < maxPermutations; i++)
            {
                int swapIndex = random.UniformInt(i, ranking.Length - 1);

                int temp = ranking[swapIndex];
                ranking[swapIndex] = ranking[i];
                ranking[i]         = temp;
            }

            return(ExplorerDecision.Create(ranking, new GenericExplorerState {
                Probability = 1f
            }, true));
        }
        public override ExplorerDecision <int> MapContext(PRG random, int policyAction, int numActionsVariable)
        {
            if (policyAction == 0 || policyAction > numActionsVariable)
            {
                throw new ArgumentException("Action chosen by default policy is not within valid range.");
            }

            int   chosenAction         = 0;
            float actionProbability    = 0f;
            bool  shouldRecordDecision = true;
            bool  isExplore            = true;
            int   tau = this.tau;

            lock (this.lockObject)
            {
                if (this.tau > 0 && this.explore)
                {
                    this.tau--;

                    chosenAction      = random.UniformInt(1, numActionsVariable);
                    actionProbability = 1f / numActionsVariable;
                    isExplore         = true;
                }
                else
                {
                    chosenAction = policyAction;

                    actionProbability = 1f;
                    isExplore         = false;
                }
            }

            TauFirstState explorerState = new TauFirstState
            {
                IsExplore   = isExplore,
                Probability = actionProbability,
                Tau         = tau
            };

            return(ExplorerDecision.Create(chosenAction, explorerState, shouldRecordDecision));
        }
示例#6
0
        public ExplorerDecision <int[]> MapContext(PRG random, int[] policyAction, int numActions)
        {
            MultiActionHelper.ValidateActionList(policyAction);

            float epsilon = this.explore ? this.defaultEpsilon : 0f;

            int[] chosenAction;
            bool  isExplore;

            if (random.UniformUnitInterval() < epsilon)
            {
                // 1 ... n
                chosenAction = Enumerable.Range(1, policyAction.Length).ToArray();

                // 0 ... n - 2
                for (int i = 0; i < policyAction.Length - 1; i++)
                {
                    int swapIndex = random.UniformInt(i, policyAction.Length - 1);

                    int temp = chosenAction[swapIndex];
                    chosenAction[swapIndex] = chosenAction[i];
                    chosenAction[i]         = temp;
                }

                isExplore = true;
            }
            else
            {
                chosenAction = policyAction;
                isExplore    = false;
            }

            EpsilonGreedySlateState explorerState = new EpsilonGreedySlateState
            {
                Epsilon   = this.defaultEpsilon,
                IsExplore = isExplore,
                Ranking   = policyAction
            };

            return(ExplorerDecision.Create(chosenAction, explorerState, true));
        }