private static void EpsilonGreedyWithContext <TContext>(uint numActions, TContext testContext, TestPolicy <TContext> policy, IExplorer <TContext> explorer) where TContext : TestContext { string uniqueKey = "ManagedTestId"; TestRecorder <TContext> recorder = new TestRecorder <TContext>(); MwtExplorer <TContext> mwtt = new MwtExplorer <TContext>("mwt", recorder); testContext.Id = 100; uint expectedAction = policy.ChooseAction(testContext); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(2, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); // Verify that policy action is chosen all the time explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); } }
public void TauFirst() { uint numActions = 10; uint tau = 0; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestPolicy policy = new TestPolicy(); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); TestContext testContext = new TestContext() { Id = 100 }; var explorer = new TauFirstExplorer <TestContext>(policy, tau, numActions); uint expectedAction = policy.ChooseAction(testContext); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(0, interactions.Count); }
public void EpsilonGreedy() { uint numActions = 10; float epsilon = 0f; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestPolicy policy = new TestPolicy(); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); TestContext testContext = new TestContext(); testContext.Id = 100; var explorer = new EpsilonGreedyExplorer <TestContext>(policy, epsilon, numActions); uint expectedAction = policy.ChooseAction(testContext); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(2, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); }
private static void SoftmaxWithContext <TContext>(uint numActions, IExplorer <TContext> explorer, TContext[] contexts) where TContext : TestContext { var recorder = new TestRecorder <TContext>(); var mwtt = new MwtExplorer <TContext>("mwt", recorder); uint[] actions = new uint[numActions]; Random rand = new Random(); for (uint i = 0; i < contexts.Length; i++) { uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), contexts[i]); actions[chosenAction - 1]++; // action id is one-based } for (uint i = 0; i < numActions; i++) { Assert.IsTrue(actions[i] > 0); } var interactions = recorder.GetAllInteractions(); Assert.AreEqual(contexts.Length, interactions.Count); for (int i = 0; i < contexts.Length; i++) { Assert.AreEqual(i, interactions[i].Context.Id); } }
public void EndToEndGeneric() { uint numActions = 10; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestScorer <SimpleContext> scorer = new TestScorer <SimpleContext>(numActions); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new GenericExplorer <SimpleContext>(scorer, numActions); EndToEnd(mwtt, explorer, recorder); }
public void EndToEndSoftmax() { uint numActions = 10; float lambda = 0.5f; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestScorer <SimpleContext> scorer = new TestScorer <SimpleContext>(numActions); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new SoftmaxExplorer <SimpleContext>(scorer, lambda, numActions); EndToEnd(mwtt, explorer, recorder); }
public void EndToEndTauFirst() { uint numActions = 10; uint tau = 5; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestSimplePolicy policy = new TestSimplePolicy(); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new TauFirstExplorer <SimpleContext>(policy, tau, numActions); EndToEnd(mwtt, explorer, recorder); }
public void EndToEndEpsilonGreedy() { uint numActions = 10; float epsilon = 0.5f; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); TestSimplePolicy policy = new TestSimplePolicy(); var explorer = new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions); EndToEnd(mwtt, explorer, recorder); }
private static void GenericWithContext <TContext>(uint numActions, TContext testContext, IExplorer <TContext> explorer) where TContext : TestContext { string uniqueKey = "ManagedTestId"; var recorder = new TestRecorder <TContext>(); var mwtt = new MwtExplorer <TContext>("mwt", recorder); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(1, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); }
public void EndToEndBagging() { uint numActions = 10; uint numbags = 2; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestSimplePolicy[] policies = new TestSimplePolicy[numbags]; for (int i = 0; i < numbags; i++) { policies[i] = new TestSimplePolicy(); } MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new BaggingExplorer <SimpleContext>(policies, numbags, numActions); EndToEnd(mwtt, explorer, recorder); }
public void Softmax() { uint numActions = 10; float lambda = 0.5f; uint numActionsCover = 100; float C = 5; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); uint[] actions = new uint[numActions]; Random rand = new Random(); for (uint i = 0; i < numDecisions; i++) { uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = (int)i }); actions[chosenAction - 1]++; // action id is one-based } for (uint i = 0; i < numActions; i++) { Assert.IsTrue(actions[i] > 0); } var interactions = recorder.GetAllInteractions(); Assert.AreEqual(numDecisions, (uint)interactions.Count); for (int i = 0; i < numDecisions; i++) { Assert.AreEqual(i, interactions[i].Context.Id); } }
public void Bootstrap() { uint numActions = 10; uint numbags = 2; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestPolicy[] policies = new TestPolicy[numbags]; for (int i = 0; i < numbags; i++) { policies[i] = new TestPolicy(i * 2); } TestContext testContext1 = new TestContext() { Id = 99 }; TestContext testContext2 = new TestContext() { Id = 100 }; MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new BootstrapExplorer <TestContext>(policies, numActions); uint expectedAction = policies[0].ChooseAction(testContext1); uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext1); Assert.AreEqual(expectedAction, chosenAction); chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext2); Assert.AreEqual(expectedAction, chosenAction); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(2, interactions.Count); Assert.AreEqual(testContext1.Id, interactions[0].Context.Id); Assert.AreEqual(testContext2.Id, interactions[1].Context.Id); }
public void Generic() { uint numActions = 10; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new GenericExplorer <TestContext>(scorer, numActions); TestContext testContext = new TestContext() { Id = 100 }; uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(1, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); }
public void SoftmaxScores() { uint numActions = 10; float lambda = 0.5f; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions, uniform: false); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); Random rand = new Random(); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 100 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 101 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } }
private void EndToEnd(MwtExplorer <SimpleContext> mwtt, IExplorer <SimpleContext> explorer, TestRecorder <SimpleContext> recorder) { uint numActions = 10; Random rand = new Random(); List <float> rewards = new List <float>(); for (int i = 0; i < 1000; i++) { Feature[] f = new Feature[rand.Next(800, 1201)]; for (int j = 0; j < f.Length; j++) { f[j].Id = (uint)(j + 1); f[j].Value = (float)rand.NextDouble(); } SimpleContext c = new SimpleContext(f); mwtt.ChooseAction(explorer, i.ToString(), c); rewards.Add((float)rand.NextDouble()); } var testInteractions = recorder.GetAllInteractions(); Interaction[] partialInteractions = new Interaction[testInteractions.Count]; for (int i = 0; i < testInteractions.Count; i++) { partialInteractions[i] = new Interaction() { ApplicationContext = new OldSimpleContext(testInteractions[i].Context.GetFeatures(), null), ChosenAction = testInteractions[i].Action, Probability = testInteractions[i].Probability, Id = testInteractions[i].UniqueKey }; } MwtRewardReporter mrr = new MwtRewardReporter(partialInteractions); for (int i = 0; i < partialInteractions.Length; i++) { Assert.AreEqual(true, mrr.ReportReward(partialInteractions[i].GetId(), rewards[i])); } Interaction[] completeInteractions = mrr.GetAllInteractions(); MwtOptimizer mop = new MwtOptimizer(completeInteractions, numActions); string modelFile = "model"; mop.OptimizePolicyVWCSOAA(modelFile); Assert.IsTrue(System.IO.File.Exists(modelFile)); float evaluatedValue = mop.EvaluatePolicyVWCSOAA(modelFile); Assert.IsFalse(float.IsNaN(evaluatedValue)); System.IO.File.Delete(modelFile); }
public void SoftmaxScores() { uint numActions = 10; float lambda = 0.5f; var recorder = new TestRecorder <TestContext>(); var scorer = new TestScorer <TestContext>(numActions, uniform: false); var mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); Random rand = new Random(); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 100 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 101 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } // Verify that policy action is chosen all the time TestContext context = new TestContext { Id = 100 }; List <float> scores = scorer.ScoreActions(context); float maxScore = 0; uint highestScoreAction = 0; for (int i = 0; i < scores.Count; i++) { if (maxScore < scores[i]) { maxScore = scores[i]; highestScoreAction = (uint)i + 1; } } explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = (int)i }); Assert.AreEqual(highestScoreAction, chosenAction); } }