public override ExplorerDecision <int[]> MapContext(PRG prg, int[] ranking, int numActions) { if (ranking == null || ranking.Length < 1) { throw new ArgumentException("Actions chosen by default policy must not be empty."); } var decision = this.explorer.MapContext(prg, ranking[0], ranking.Length); MultiActionHelper.PutActionToList(decision.Value, ranking); return(ExplorerDecision.Create(ranking, decision.ExplorerState, decision.ShouldRecord)); }
public override ExplorerDecision <int[]> MapContext(PRG random, float[] weights, int numActions) { var decision = this.explorer.MapContext(random, weights, numActions); float actionProbability = 0f; int[] chosenActions = MultiActionHelper.SampleWithoutReplacement(weights, weights.Length, random, ref actionProbability); // action id is one-based return(ExplorerDecision.Create(chosenActions, new GenericExplorerState { Probability = actionProbability }, true)); }
public ExplorerDecision <int[]> MapContext(PRG random, int[] policyAction, int numActions) { MultiActionHelper.ValidateActionList(policyAction); float epsilon = this.explore ? this.defaultEpsilon : 0f; int[] chosenAction; bool isExplore; if (random.UniformUnitInterval() < epsilon) { // 1 ... n chosenAction = Enumerable.Range(1, policyAction.Length).ToArray(); // 0 ... n - 2 for (int i = 0; i < policyAction.Length - 1; i++) { int swapIndex = random.UniformInt(i, policyAction.Length - 1); int temp = chosenAction[swapIndex]; chosenAction[swapIndex] = chosenAction[i]; chosenAction[i] = temp; } isExplore = true; } else { chosenAction = policyAction; isExplore = false; } EpsilonGreedySlateState explorerState = new EpsilonGreedySlateState { Epsilon = this.defaultEpsilon, IsExplore = isExplore, Ranking = policyAction }; return(ExplorerDecision.Create(chosenAction, explorerState, true)); }
public override ExplorerDecision <int[]> MapContext(PRG random, float[] scores, int numActions) { if (scores == null || scores.Length < 1) { throw new ArgumentException("Scores returned by default policy must not be empty."); } var decision = this.explorer.MapContext(random, scores, numActions); int numActionsVariable = scores.Length; int[] chosenActions; // Note: there might be a way using out generic parameters and explicit interface implementation to avoid the cast float actionProbability = ((GenericExplorerState)decision.ExplorerState).Probability; if (this.explore) { chosenActions = MultiActionHelper.SampleWithoutReplacement(scores, numActionsVariable, random, ref actionProbability); } else { // avoid linq to optimize perf chosenActions = new int[numActionsVariable]; for (int i = 1; i <= numActionsVariable; i++) { chosenActions[i] = i; } // swap max-score action with the first one int firstAction = chosenActions[0]; chosenActions[0] = chosenActions[decision.Value]; chosenActions[decision.Value] = firstAction; } return(ExplorerDecision.Create(chosenActions, decision.ExplorerState, decision.ShouldRecord)); }