public void EpsilonGreedy() { int numActions = 10; float epsilon = 0f; var policy = new TestPolicy <RegularTestContext>(); var testContext = new RegularTestContext(); var explorer = new EpsilonGreedyExplorer(epsilon); EpsilonGreedyWithContext(numActions, testContext, policy, explorer); }
public void Generic() { int numActions = 10; var scorer = new TestScorer <RegularTestContext>(1, numActions); RegularTestContext testContext = new RegularTestContext() { Id = 100 }; var explorer = new GenericExplorer(); GenericWithContext(numActions, testContext, explorer, scorer); }
public void TauFirst() { int numActions = 10; int tau = 0; RegularTestContext testContext = new RegularTestContext() { Id = 100 }; var policy = new TestPolicy <RegularTestContext>(); var explorer = new TauFirstExplorer(tau); TauFirstWithContext(numActions, testContext, policy, explorer); }
public void Softmax() { int numActions = 10; float lambda = 0.5f; int numActionsCover = 100; float C = 5; var scorer = new TestScorer <RegularTestContext>(1, numActions); var explorer = new SoftmaxExplorer(lambda); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new RegularTestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new RegularTestContext { Id = i }; } SoftmaxWithContext(numActions, explorer, scorer, contexts); }
public void SoftmaxScores() { int numActions = 10; float lambda = 0.5f; var recorder = new TestRecorder <RegularTestContext>(); var scorer = new TestScorer <RegularTestContext>(1, numActions, uniform: false); //var mwtt = new MwtExplorer<RegularTestContext>("mwt", recorder); var explorer = new SoftmaxExplorer(lambda); var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer); Random rand = new Random(); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 100 }); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 101 }); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } // Verify that policy action is chosen all the time RegularTestContext context = new RegularTestContext { Id = 100 }; List <float> scores = scorer.MapContext(context).Value.ToList(); float maxScore = 0; int highestScoreAction = 0; for (int i = 0; i < scores.Count; i++) { if (maxScore < scores[i]) { maxScore = scores[i]; highestScoreAction = i + 1; } } explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { int chosenAction = mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = (int)i }); Assert.AreEqual(highestScoreAction, chosenAction); } }