public void EpsilonGreedyFixedActionUsingVariableActionInterface() { uint numActions = 10; float epsilon = 0f; var policy = new TestPolicy <TestVarContext>(); var testContext = new TestVarContext(numActions); var explorer = new EpsilonGreedyExplorer <TestVarContext>(policy, epsilon); EpsilonGreedyWithContext(numActions, testContext, policy, explorer); }
public void GenericFixedActionUsingVariableActionInterface() { uint numActions = 10; var scorer = new TestScorer <TestVarContext>(numActions); var testContext = new TestVarContext(numActions) { Id = 100 }; var explorer = new GenericExplorer <TestVarContext>(scorer); GenericWithContext(numActions, testContext, explorer); }
public void TauFirstFixedActionUsingVariableActionInterface() { uint numActions = 10; uint tau = 0; var testContext = new TestVarContext(numActions) { Id = 100 }; var policy = new TestPolicy <TestVarContext>(); var explorer = new TauFirstExplorer <TestVarContext>(policy, tau); TauFirstWithContext(numActions, testContext, policy, explorer); }
public void SoftmaxFixedActionUsingVariableActionInterface() { uint numActions = 10; float lambda = 0.5f; uint numActionsCover = 100; float C = 5; var scorer = new TestScorer <TestVarContext>(numActions); var explorer = new SoftmaxExplorer <TestVarContext>(scorer, lambda); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new TestVarContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new TestVarContext(numActions) { Id = i }; } SoftmaxWithContext(numActions, explorer, contexts); }
public void BootstrapFixedActionUsingVariableActionInterface() { uint numActions = 10; uint numbags = 2; var testContext1 = new TestVarContext(numActions) { Id = 99 }; var testContext2 = new TestVarContext(numActions) { Id = 100 }; var policies = new TestPolicy <TestVarContext> [numbags]; for (int i = 0; i < numbags; i++) { policies[i] = new TestPolicy <TestVarContext>(i * 2); } var explorer = new BootstrapExplorer <TestVarContext>(policies); BootstrapWithContext(numActions, testContext1, testContext2, policies, explorer); }