public void EpsilonGreedy() { uint numActions = 10; float epsilon = 0f; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestPolicy policy = new TestPolicy(); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); TestContext testContext = new TestContext(); testContext.Id = 100; var explorer = new EpsilonGreedyExplorer <TestContext>(policy, epsilon, numActions); uint expectedAction = policy.ChooseAction(testContext); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(2, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); }
public void TauFirst() { uint numActions = 10; uint tau = 0; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestPolicy policy = new TestPolicy(); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); TestContext testContext = new TestContext() { Id = 100 }; var explorer = new TauFirstExplorer <TestContext>(policy, tau, numActions); uint expectedAction = policy.ChooseAction(testContext); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(0, interactions.Count); }
private static void EpsilonGreedyWithContext <TContext>(uint numActions, TContext testContext, TestPolicy <TContext> policy, IExplorer <TContext> explorer) where TContext : TestContext { string uniqueKey = "ManagedTestId"; TestRecorder <TContext> recorder = new TestRecorder <TContext>(); MwtExplorer <TContext> mwtt = new MwtExplorer <TContext>("mwt", recorder); testContext.Id = 100; uint expectedAction = policy.ChooseAction(testContext); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(2, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); // Verify that policy action is chosen all the time explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); } }