public override ExplorerDecision <int[]> MapContext(PRG random, float[] weights, int numActions) { var decision = this.explorer.MapContext(random, weights, numActions); float actionProbability = 0f; int[] chosenActions = MultiActionHelper.SampleWithoutReplacement(weights, weights.Length, random, ref actionProbability); // action id is one-based return(ExplorerDecision.Create(chosenActions, new GenericExplorerState { Probability = actionProbability }, true)); }
public override ExplorerDecision <int[]> MapContext(PRG random, float[] scores, int numActions) { if (scores == null || scores.Length < 1) { throw new ArgumentException("Scores returned by default policy must not be empty."); } var decision = this.explorer.MapContext(random, scores, numActions); int numActionsVariable = scores.Length; int[] chosenActions; // Note: there might be a way using out generic parameters and explicit interface implementation to avoid the cast float actionProbability = ((GenericExplorerState)decision.ExplorerState).Probability; if (this.explore) { chosenActions = MultiActionHelper.SampleWithoutReplacement(scores, numActionsVariable, random, ref actionProbability); } else { // avoid linq to optimize perf chosenActions = new int[numActionsVariable]; for (int i = 1; i <= numActionsVariable; i++) { chosenActions[i] = i; } // swap max-score action with the first one int firstAction = chosenActions[0]; chosenActions[0] = chosenActions[decision.Value]; chosenActions[decision.Value] = firstAction; } return(ExplorerDecision.Create(chosenActions, decision.ExplorerState, decision.ShouldRecord)); }