public void UsageBadVariableActionContext() { int numExceptionsCaught = 0; int numExceptionsExpected = 5; var tryCatchArgumentException = (Action <Action>)((action) => { try { action(); } catch (ArgumentException ex) { if (ex.ParamName.ToLower() == "ctx") { numExceptionsCaught++; } } }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var policy = new TestPolicy <TestContext>(); var explorer = new EpsilonGreedyExplorer <TestContext>(policy, 0.2f); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var policy = new TestPolicy <TestContext>(); var explorer = new TauFirstExplorer <TestContext>(policy, 10); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var policies = new TestPolicy <TestContext> [2]; for (int i = 0; i < 2; i++) { policies[i] = new TestPolicy <TestContext>(i * 2); } var explorer = new BootstrapExplorer <TestContext>(policies); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var scorer = new TestScorer <TestContext>(10); var explorer = new SoftmaxExplorer <TestContext>(scorer, 0.5f); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var scorer = new TestScorer <TestContext>(10); var explorer = new GenericExplorer <TestContext>(scorer); mwt.ChooseAction(explorer, "key", new TestContext()); }); Assert.AreEqual(numExceptionsExpected, numExceptionsCaught); }
public void EndToEndGeneric() { uint numActions = 10; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestScorer <SimpleContext> scorer = new TestScorer <SimpleContext>(numActions); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new GenericExplorer <SimpleContext>(scorer, numActions); EndToEnd(mwtt, explorer, recorder); }
public void EndToEndSoftmax() { uint numActions = 10; float lambda = 0.5f; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestScorer <SimpleContext> scorer = new TestScorer <SimpleContext>(numActions); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new SoftmaxExplorer <SimpleContext>(scorer, lambda, numActions); EndToEnd(mwtt, explorer, recorder); }
public void GenericFixedActionUsingVariableActionInterface() { uint numActions = 10; var scorer = new TestScorer <TestVarContext>(numActions); var testContext = new TestVarContext(numActions) { Id = 100 }; var explorer = new GenericExplorer <TestVarContext>(scorer); GenericWithContext(numActions, testContext, explorer); }
public void Generic() { uint numActions = 10; TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions); TestContext testContext = new TestContext() { Id = 100 }; var explorer = new GenericExplorer <TestContext>(scorer, numActions); GenericWithContext(numActions, testContext, explorer); }
public void Softmax() { uint numActions = 10; float lambda = 0.5f; uint numActionsCover = 100; float C = 5; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); uint[] actions = new uint[numActions]; Random rand = new Random(); for (uint i = 0; i < numDecisions; i++) { uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = (int)i }); actions[chosenAction - 1]++; // action id is one-based } for (uint i = 0; i < numActions; i++) { Assert.IsTrue(actions[i] > 0); } var interactions = recorder.GetAllInteractions(); Assert.AreEqual(numDecisions, (uint)interactions.Count); for (int i = 0; i < numDecisions; i++) { Assert.AreEqual(i, interactions[i].Context.Id); } }
public void Generic() { uint numActions = 10; string uniqueKey = "ManagedTestId"; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new GenericExplorer <TestContext>(scorer, numActions); TestContext testContext = new TestContext() { Id = 100 }; uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(1, interactions.Count); Assert.AreEqual(testContext.Id, interactions[0].Context.Id); }
public void Softmax() { uint numActions = 10; float lambda = 0.5f; uint numActionsCover = 100; float C = 5; var scorer = new TestScorer <TestContext>(numActions); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new TestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new TestContext { Id = i }; } SoftmaxWithContext(numActions, explorer, contexts); }
public void SoftmaxScores() { uint numActions = 10; float lambda = 0.5f; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions, uniform: false); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); Random rand = new Random(); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 100 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 101 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } }
public void SoftmaxScores() { uint numActions = 10; float lambda = 0.5f; var recorder = new TestRecorder <TestContext>(); var scorer = new TestScorer <TestContext>(numActions, uniform: false); var mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); Random rand = new Random(); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 100 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 101 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } // Verify that policy action is chosen all the time TestContext context = new TestContext { Id = 100 }; List <float> scores = scorer.ScoreActions(context); float maxScore = 0; uint highestScoreAction = 0; for (int i = 0; i < scores.Count; i++) { if (maxScore < scores[i]) { maxScore = scores[i]; highestScoreAction = (uint)i + 1; } } explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = (int)i }); Assert.AreEqual(highestScoreAction, chosenAction); } }