public void UsageBadVariableActionContext() { int numExceptionsCaught = 0; int numExceptionsExpected = 5; var tryCatchArgumentException = (Action <Action>)((action) => { try { action(); } catch (ArgumentException ex) { if (ex.ParamName.ToLower() == "ctx") { numExceptionsCaught++; } } }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var policy = new TestPolicy <TestContext>(); var explorer = new EpsilonGreedyExplorer <TestContext>(policy, 0.2f); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var policy = new TestPolicy <TestContext>(); var explorer = new TauFirstExplorer <TestContext>(policy, 10); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var policies = new TestPolicy <TestContext> [2]; for (int i = 0; i < 2; i++) { policies[i] = new TestPolicy <TestContext>(i * 2); } var explorer = new BootstrapExplorer <TestContext>(policies); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var scorer = new TestScorer <TestContext>(10); var explorer = new SoftmaxExplorer <TestContext>(scorer, 0.5f); mwt.ChooseAction(explorer, "key", new TestContext()); }); tryCatchArgumentException(() => { var mwt = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>()); var scorer = new TestScorer <TestContext>(10); var explorer = new GenericExplorer <TestContext>(scorer); mwt.ChooseAction(explorer, "key", new TestContext()); }); Assert.AreEqual(numExceptionsExpected, numExceptionsCaught); }
public void EndToEndSoftmax() { uint numActions = 10; float lambda = 0.5f; TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>(); TestScorer <SimpleContext> scorer = new TestScorer <SimpleContext>(numActions); MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder); var explorer = new SoftmaxExplorer <SimpleContext>(scorer, lambda, numActions); EndToEnd(mwtt, explorer, recorder); }
public void Softmax() { uint numActions = 10; float lambda = 0.5f; uint numActionsCover = 100; float C = 5; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); uint[] actions = new uint[numActions]; Random rand = new Random(); for (uint i = 0; i < numDecisions; i++) { uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = (int)i }); actions[chosenAction - 1]++; // action id is one-based } for (uint i = 0; i < numActions; i++) { Assert.IsTrue(actions[i] > 0); } var interactions = recorder.GetAllInteractions(); Assert.AreEqual(numDecisions, (uint)interactions.Count); for (int i = 0; i < numDecisions; i++) { Assert.AreEqual(i, interactions[i].Context.Id); } }
public void Softmax() { int numActions = 10; float lambda = 0.5f; int numActionsCover = 100; float C = 5; var scorer = new TestScorer <RegularTestContext>(1, numActions); var explorer = new SoftmaxExplorer(lambda); uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new RegularTestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new RegularTestContext { Id = i }; } SoftmaxWithContext(numActions, explorer, scorer, contexts); }
public void SoftmaxFixedActionUsingVariableActionInterface() { int numActions = 10; float lambda = 0.5f; int numActionsCover = 100; float C = 5; var scorer = new TestScorer <VariableActionTestContext>(1, numActions); var explorer = new SoftmaxExplorer(lambda); int numDecisions = (int)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions); var contexts = new VariableActionTestContext[numDecisions]; for (int i = 0; i < numDecisions; i++) { contexts[i] = new VariableActionTestContext(numActions) { Id = i }; } SoftmaxWithContext(numActions, explorer, scorer, contexts); }
public void SoftmaxScores() { uint numActions = 10; float lambda = 0.5f; TestRecorder <TestContext> recorder = new TestRecorder <TestContext>(); TestScorer <TestContext> scorer = new TestScorer <TestContext>(numActions, uniform: false); MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder); var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions); Random rand = new Random(); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 100 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 101 }); mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } }
public void SoftmaxScores() { int numActions = 10; float lambda = 0.5f; var recorder = new TestRecorder <RegularTestContext>(); var scorer = new TestScorer <RegularTestContext>(1, numActions, uniform: false); //var mwtt = new MwtExplorer<RegularTestContext>("mwt", recorder); var explorer = new SoftmaxExplorer(lambda); var mwtt = MwtExplorer.Create("mwt", numActions, recorder, explorer, scorer); Random rand = new Random(); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 100 }); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 101 }); mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = 102 }); var interactions = recorder.GetAllInteractions(); Assert.AreEqual(3, interactions.Count); for (int i = 0; i < interactions.Count; i++) { // Scores are not equal therefore probabilities should not be uniform Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions); Assert.AreEqual(100 + i, interactions[i].Context.Id); } // Verify that policy action is chosen all the time RegularTestContext context = new RegularTestContext { Id = 100 }; List <float> scores = scorer.MapContext(context).Value.ToList(); float maxScore = 0; int highestScoreAction = 0; for (int i = 0; i < scores.Count; i++) { if (maxScore < scores[i]) { maxScore = scores[i]; highestScoreAction = i + 1; } } explorer.EnableExplore(false); for (int i = 0; i < 1000; i++) { int chosenAction = mwtt.ChooseAction(rand.NextDouble().ToString(), new RegularTestContext() { Id = (int)i }); Assert.AreEqual(highestScoreAction, chosenAction); } }
static void ExploreSoftmax <TContext> ( string appId, int policyType, JToken configPolicy, float lambda, int numActions, string[] experimentalUnitIdList, TContext[] contextList, string outputFile ) { var recorder = new StringRecorder <TContext>(); bool isVariableActionContext = typeof(IVariableActionContext).IsAssignableFrom(typeof(TContext)); switch (policyType) { case 0: // fixed all-equal scorer { var scorerScore = configPolicy["Score"].Value <int>(); var scorer = new TestScorer <TContext>(scorerScore, numActions); var explorer = new SoftmaxExplorer(lambda); var mwt = isVariableActionContext ? MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) : MwtExplorer.Create(appId, numActions, recorder, explorer, scorer); for (int i = 0; i < experimentalUnitIdList.Length; i++) { int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue; mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]); } File.AppendAllText(outputFile, recorder.GetRecording()); break; } case 1: // integer-progression scorer { var scorerStartScore = configPolicy["Start"].Value <int>(); var scorer = new TestScorer <TContext>(scorerStartScore, numActions, uniform: false); var explorer = new SoftmaxExplorer(lambda); var mwt = isVariableActionContext ? MwtExplorer.Create(appId, new VariableActionProvider <TContext>(), recorder, explorer, scorer) : MwtExplorer.Create(appId, numActions, recorder, explorer, scorer); for (int i = 0; i < experimentalUnitIdList.Length; i++) { int numActionsVariable = isVariableActionContext ? ((IVariableActionContext)contextList[i]).GetNumberOfActions() : int.MaxValue; mwt.ChooseAction(experimentalUnitIdList[i], contextList[i]); } File.AppendAllText(outputFile, recorder.GetRecording()); break; } } }