public void TauFirst()
        {
            uint   numActions = 10;
            uint   tau        = 0;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestPolicy policy = new TestPolicy();
            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            TestContext testContext        = new TestContext()
            {
                Id = 100
            };

            var explorer = new TauFirstExplorer <TestContext>(policy, tau, numActions);

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(0, interactions.Count);
        }
        public void UsageBadVariableActionContext()
        {
            int numExceptionsCaught   = 0;
            int numExceptionsExpected = 5;

            var tryCatchArgumentException = (Action <Action>)((action) => {
                try
                {
                    action();
                }
                catch (ArgumentException ex)
                {
                    if (ex.ParamName.ToLower() == "ctx")
                    {
                        numExceptionsCaught++;
                    }
                }
            });

            tryCatchArgumentException(() => {
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var policy   = new TestPolicy <TestContext>();
                var explorer = new EpsilonGreedyExplorer <TestContext>(policy, 0.2f);
                mwt.ChooseAction(explorer, "key", new TestContext());
            });
            tryCatchArgumentException(() =>
            {
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var policy   = new TestPolicy <TestContext>();
                var explorer = new TauFirstExplorer <TestContext>(policy, 10);
                mwt.ChooseAction(explorer, "key", new TestContext());
            });
            tryCatchArgumentException(() =>
            {
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var policies = new TestPolicy <TestContext> [2];
                for (int i = 0; i < 2; i++)
                {
                    policies[i] = new TestPolicy <TestContext>(i * 2);
                }
                var explorer = new BootstrapExplorer <TestContext>(policies);
                mwt.ChooseAction(explorer, "key", new TestContext());
            });
            tryCatchArgumentException(() =>
            {
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var scorer   = new TestScorer <TestContext>(10);
                var explorer = new SoftmaxExplorer <TestContext>(scorer, 0.5f);
                mwt.ChooseAction(explorer, "key", new TestContext());
            });
            tryCatchArgumentException(() =>
            {
                var mwt      = new MwtExplorer <TestContext>("test", new TestRecorder <TestContext>());
                var scorer   = new TestScorer <TestContext>(10);
                var explorer = new GenericExplorer <TestContext>(scorer);
                mwt.ChooseAction(explorer, "key", new TestContext());
            });

            Assert.AreEqual(numExceptionsExpected, numExceptionsCaught);
        }
        public void EpsilonGreedy()
        {
            uint   numActions = 10;
            float  epsilon    = 0f;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestPolicy policy = new TestPolicy();
            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            TestContext testContext        = new TestContext();

            testContext.Id = 100;

            var explorer = new EpsilonGreedyExplorer <TestContext>(policy, epsilon, numActions);

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
        }
        public void EpsilonGreedyFixedActionUsingVariableActionInterface()
        {
            uint  numActions  = 10;
            float epsilon     = 0f;
            var   policy      = new TestPolicy <TestVarContext>();
            var   testContext = new TestVarContext(numActions);
            var   explorer    = new EpsilonGreedyExplorer <TestVarContext>(policy, epsilon);

            EpsilonGreedyWithContext(numActions, testContext, policy, explorer);
        }
        public void EpsilonGreedy()
        {
            uint  numActions  = 10;
            float epsilon     = 0f;
            var   policy      = new TestPolicy <TestContext>();
            var   testContext = new TestContext();
            var   explorer    = new EpsilonGreedyExplorer <TestContext>(policy, epsilon, numActions);

            EpsilonGreedyWithContext(numActions, testContext, policy, explorer);
        }
        public void TauFirstFixedActionUsingVariableActionInterface()
        {
            uint numActions  = 10;
            uint tau         = 0;
            var  testContext = new TestVarContext(numActions)
            {
                Id = 100
            };
            var policy   = new TestPolicy <TestVarContext>();
            var explorer = new TauFirstExplorer <TestVarContext>(policy, tau);

            TauFirstWithContext(numActions, testContext, policy, explorer);
        }
        public void TauFirst()
        {
            uint        numActions  = 10;
            uint        tau         = 0;
            TestContext testContext = new TestContext()
            {
                Id = 100
            };
            var policy   = new TestPolicy <TestContext>();
            var explorer = new TauFirstExplorer <TestContext>(policy, tau, numActions);

            TauFirstWithContext(numActions, testContext, policy, explorer);
        }
        public void Bootstrap()
        {
            uint   numActions = 10;
            uint   numbags    = 2;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();

            TestPolicy[] policies = new TestPolicy[numbags];
            for (int i = 0; i < numbags; i++)
            {
                policies[i] = new TestPolicy(i * 2);
            }
            TestContext testContext1 = new TestContext()
            {
                Id = 99
            };
            TestContext testContext2 = new TestContext()
            {
                Id = 100
            };

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new BootstrapExplorer <TestContext>(policies, numActions);

            uint expectedAction = policies[0].ChooseAction(testContext1);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext1);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext2);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext1.Id, interactions[0].Context.Id);
            Assert.AreEqual(testContext2.Id, interactions[1].Context.Id);
        }
        public void BootstrapFixedActionUsingVariableActionInterface()
        {
            uint numActions   = 10;
            uint numbags      = 2;
            var  testContext1 = new TestVarContext(numActions)
            {
                Id = 99
            };
            var testContext2 = new TestVarContext(numActions)
            {
                Id = 100
            };

            var policies = new TestPolicy <TestVarContext> [numbags];

            for (int i = 0; i < numbags; i++)
            {
                policies[i] = new TestPolicy <TestVarContext>(i * 2);
            }
            var explorer = new BootstrapExplorer <TestVarContext>(policies);

            BootstrapWithContext(numActions, testContext1, testContext2, policies, explorer);
        }
        public void Bootstrap()
        {
            uint        numActions   = 10;
            uint        numbags      = 2;
            TestContext testContext1 = new TestContext()
            {
                Id = 99
            };
            TestContext testContext2 = new TestContext()
            {
                Id = 100
            };

            var policies = new TestPolicy <TestContext> [numbags];

            for (int i = 0; i < numbags; i++)
            {
                policies[i] = new TestPolicy <TestContext>(i * 2);
            }
            var explorer = new BootstrapExplorer <TestContext>(policies, numActions);

            BootstrapWithContext(numActions, testContext1, testContext2, policies, explorer);
        }
        private static void EpsilonGreedyWithContext <TContext>(uint numActions, TContext testContext, TestPolicy <TContext> policy, IExplorer <TContext> explorer)
            where TContext : TestContext
        {
            string uniqueKey = "ManagedTestId";
            TestRecorder <TContext> recorder = new TestRecorder <TContext>();
            MwtExplorer <TContext>  mwtt     = new MwtExplorer <TContext>("mwt", recorder);

            testContext.Id = 100;

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);

            // Verify that policy action is chosen all the time
            explorer.EnableExplore(false);
            for (int i = 0; i < 1000; i++)
            {
                chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
                Assert.AreEqual(expectedAction, chosenAction);
            }
        }