public void EpsilonGreedyFixedActionUsingVariableActionInterface() { int numActions = 10; float epsilon = 0f; var policy = new TestPolicy <VariableActionTestContext>(); var testContext = new VariableActionTestContext(numActions); var explorer = new EpsilonGreedyExplorer(epsilon); EpsilonGreedyWithContext(numActions, testContext, policy, explorer); }
public void GenericFixedActionUsingVariableActionInterface() { int numActions = 10; var scorer = new TestScorer <VariableActionTestContext>(1, numActions); var testContext = new VariableActionTestContext(numActions) { Id = 100 }; var explorer = new GenericExplorer(); GenericWithContext(numActions, testContext, explorer, scorer); }
public void TauFirstFixedActionUsingVariableActionInterface() { int numActions = 10; int tau = 0; var testContext = new VariableActionTestContext(numActions) { Id = 100 }; var policy = new TestPolicy <VariableActionTestContext>(); var explorer = new TauFirstExplorer(tau); TauFirstWithContext(numActions, testContext, policy, explorer); }
public void SoftmaxFixedActionUsingVariableActionInterface() { int numActions = 10; float lambda = 0.5f; int numActionsCover = 100; float C = 5; var scorer = new TestScorer <VariableActionTestContext>(1, numActions); var explorer = new SoftmaxExplorer(lambda); int numDecisions = (int)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new VariableActionTestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new VariableActionTestContext(numActions) { Id = i }; } SoftmaxWithContext(numActions, explorer, scorer, contexts); }