private static void EpsilonGreedyWithContext <TContext>(uint numActions, TContext testContext, TestPolicy <TContext> policy, IExplorer <TContext> explorer)
            where TContext : TestContext
        {
            string uniqueKey = "ManagedTestId";
            TestRecorder <TContext> recorder = new TestRecorder <TContext>();
            MwtExplorer <TContext>  mwtt     = new MwtExplorer <TContext>("mwt", recorder);

            testContext.Id = 100;

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);

            // Verify that policy action is chosen all the time
            explorer.EnableExplore(false);
            for (int i = 0; i < 1000; i++)
            {
                chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
                Assert.AreEqual(expectedAction, chosenAction);
            }
        }
        public void TauFirst()
        {
            uint   numActions = 10;
            uint   tau        = 0;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestPolicy policy = new TestPolicy();
            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            TestContext testContext        = new TestContext()
            {
                Id = 100
            };

            var explorer = new TauFirstExplorer <TestContext>(policy, tau, numActions);

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(0, interactions.Count);
        }
        public void EpsilonGreedy()
        {
            uint   numActions = 10;
            float  epsilon    = 0f;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestPolicy policy = new TestPolicy();
            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            TestContext testContext        = new TestContext();

            testContext.Id = 100;

            var explorer = new EpsilonGreedyExplorer <TestContext>(policy, epsilon, numActions);

            uint expectedAction = policy.ChooseAction(testContext);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
        }
        private static void SoftmaxWithContext <TContext>(uint numActions, IExplorer <TContext> explorer, TContext[] contexts)
            where TContext : TestContext
        {
            var recorder = new TestRecorder <TContext>();
            var mwtt     = new MwtExplorer <TContext>("mwt", recorder);

            uint[] actions = new uint[numActions];

            Random rand = new Random();

            for (uint i = 0; i < contexts.Length; i++)
            {
                uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), contexts[i]);
                actions[chosenAction - 1]++; // action id is one-based
            }

            for (uint i = 0; i < numActions; i++)
            {
                Assert.IsTrue(actions[i] > 0);
            }

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(contexts.Length, interactions.Count);

            for (int i = 0; i < contexts.Length; i++)
            {
                Assert.AreEqual(i, interactions[i].Context.Id);
            }
        }
Exemple #5
0
        public void EndToEndGeneric()
        {
            uint numActions = 10;
            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            TestScorer <SimpleContext>   scorer   = new TestScorer <SimpleContext>(numActions);

            MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);
            var explorer = new GenericExplorer <SimpleContext>(scorer, numActions);

            EndToEnd(mwtt, explorer, recorder);
        }
Exemple #6
0
        public void EndToEndSoftmax()
        {
            uint  numActions = 10;
            float lambda     = 0.5f;
            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            TestScorer <SimpleContext>   scorer   = new TestScorer <SimpleContext>(numActions);

            MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <SimpleContext>(scorer, lambda, numActions);

            EndToEnd(mwtt, explorer, recorder);
        }
Exemple #7
0
        public void EndToEndTauFirst()
        {
            uint numActions = 10;
            uint tau        = 5;

            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            TestSimplePolicy             policy   = new TestSimplePolicy();
            MwtExplorer <SimpleContext>  mwtt     = new MwtExplorer <SimpleContext>("mwt", recorder);

            var explorer = new TauFirstExplorer <SimpleContext>(policy, tau, numActions);

            EndToEnd(mwtt, explorer, recorder);
        }
Exemple #8
0
        public void EndToEndEpsilonGreedy()
        {
            uint  numActions = 10;
            float epsilon    = 0.5f;

            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();
            MwtExplorer <SimpleContext>  mwtt     = new MwtExplorer <SimpleContext>("mwt", recorder);

            TestSimplePolicy policy = new TestSimplePolicy();
            var explorer            = new EpsilonGreedyExplorer <SimpleContext>(policy, epsilon, numActions);

            EndToEnd(mwtt, explorer, recorder);
        }
        private static void GenericWithContext <TContext>(uint numActions, TContext testContext, IExplorer <TContext> explorer)
            where TContext : TestContext
        {
            string uniqueKey = "ManagedTestId";
            var    recorder  = new TestRecorder <TContext>();

            var mwtt = new MwtExplorer <TContext>("mwt", recorder);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(1, interactions.Count);
            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
        }
Exemple #10
0
        public void EndToEndBagging()
        {
            uint numActions = 10;
            uint numbags    = 2;

            TestRecorder <SimpleContext> recorder = new TestRecorder <SimpleContext>();

            TestSimplePolicy[] policies = new TestSimplePolicy[numbags];
            for (int i = 0; i < numbags; i++)
            {
                policies[i] = new TestSimplePolicy();
            }

            MwtExplorer <SimpleContext> mwtt = new MwtExplorer <SimpleContext>("mwt", recorder);
            var explorer = new BaggingExplorer <SimpleContext>(policies, numbags, numActions);

            EndToEnd(mwtt, explorer, recorder);
        }
        public void Softmax()
        {
            uint  numActions      = 10;
            float lambda          = 0.5f;
            uint  numActionsCover = 100;
            float C = 5;

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestScorer <TestContext>   scorer   = new TestScorer <TestContext>(numActions);

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions);

            uint numDecisions = (uint)(numActions * Math.Log(numActions * 1.0) + Math.Log(numActionsCover * 1.0 / numActions) * C * numActions);

            uint[] actions = new uint[numActions];

            Random rand = new Random();

            for (uint i = 0; i < numDecisions; i++)
            {
                uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
                {
                    Id = (int)i
                });
                actions[chosenAction - 1]++; // action id is one-based
            }

            for (uint i = 0; i < numActions; i++)
            {
                Assert.IsTrue(actions[i] > 0);
            }

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(numDecisions, (uint)interactions.Count);

            for (int i = 0; i < numDecisions; i++)
            {
                Assert.AreEqual(i, interactions[i].Context.Id);
            }
        }
        public void Bootstrap()
        {
            uint   numActions = 10;
            uint   numbags    = 2;
            string uniqueKey  = "ManagedTestId";

            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();

            TestPolicy[] policies = new TestPolicy[numbags];
            for (int i = 0; i < numbags; i++)
            {
                policies[i] = new TestPolicy(i * 2);
            }
            TestContext testContext1 = new TestContext()
            {
                Id = 99
            };
            TestContext testContext2 = new TestContext()
            {
                Id = 100
            };

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new BootstrapExplorer <TestContext>(policies, numActions);

            uint expectedAction = policies[0].ChooseAction(testContext1);

            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext1);

            Assert.AreEqual(expectedAction, chosenAction);

            chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext2);
            Assert.AreEqual(expectedAction, chosenAction);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(2, interactions.Count);

            Assert.AreEqual(testContext1.Id, interactions[0].Context.Id);
            Assert.AreEqual(testContext2.Id, interactions[1].Context.Id);
        }
        public void Generic()
        {
            uint   numActions = 10;
            string uniqueKey  = "ManagedTestId";
            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestScorer <TestContext>   scorer   = new TestScorer <TestContext>(numActions);

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new GenericExplorer <TestContext>(scorer, numActions);

            TestContext testContext = new TestContext()
            {
                Id = 100
            };
            uint chosenAction = mwtt.ChooseAction(explorer, uniqueKey, testContext);

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(1, interactions.Count);
            Assert.AreEqual(testContext.Id, interactions[0].Context.Id);
        }
        public void SoftmaxScores()
        {
            uint  numActions = 10;
            float lambda     = 0.5f;
            TestRecorder <TestContext> recorder = new TestRecorder <TestContext>();
            TestScorer <TestContext>   scorer   = new TestScorer <TestContext>(numActions, uniform: false);

            MwtExplorer <TestContext> mwtt = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions);

            Random rand = new Random();

            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
            {
                Id = 100
            });
            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
            {
                Id = 101
            });
            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
            {
                Id = 102
            });

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(3, interactions.Count);

            for (int i = 0; i < interactions.Count; i++)
            {
                // Scores are not equal therefore probabilities should not be uniform
                Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions);
                Assert.AreEqual(100 + i, interactions[i].Context.Id);
            }
        }
Exemple #15
0
        private void EndToEnd(MwtExplorer <SimpleContext> mwtt, IExplorer <SimpleContext> explorer, TestRecorder <SimpleContext> recorder)
        {
            uint numActions = 10;

            Random rand = new Random();

            List <float> rewards = new List <float>();

            for (int i = 0; i < 1000; i++)
            {
                Feature[] f = new Feature[rand.Next(800, 1201)];
                for (int j = 0; j < f.Length; j++)
                {
                    f[j].Id    = (uint)(j + 1);
                    f[j].Value = (float)rand.NextDouble();
                }
                SimpleContext c = new SimpleContext(f);

                mwtt.ChooseAction(explorer, i.ToString(), c);

                rewards.Add((float)rand.NextDouble());
            }

            var testInteractions = recorder.GetAllInteractions();

            Interaction[] partialInteractions = new Interaction[testInteractions.Count];
            for (int i = 0; i < testInteractions.Count; i++)
            {
                partialInteractions[i] = new Interaction()
                {
                    ApplicationContext = new OldSimpleContext(testInteractions[i].Context.GetFeatures(), null),
                    ChosenAction       = testInteractions[i].Action,
                    Probability        = testInteractions[i].Probability,
                    Id = testInteractions[i].UniqueKey
                };
            }

            MwtRewardReporter mrr = new MwtRewardReporter(partialInteractions);

            for (int i = 0; i < partialInteractions.Length; i++)
            {
                Assert.AreEqual(true, mrr.ReportReward(partialInteractions[i].GetId(), rewards[i]));
            }

            Interaction[] completeInteractions = mrr.GetAllInteractions();
            MwtOptimizer  mop = new MwtOptimizer(completeInteractions, numActions);

            string modelFile = "model";

            mop.OptimizePolicyVWCSOAA(modelFile);

            Assert.IsTrue(System.IO.File.Exists(modelFile));

            float evaluatedValue = mop.EvaluatePolicyVWCSOAA(modelFile);

            Assert.IsFalse(float.IsNaN(evaluatedValue));

            System.IO.File.Delete(modelFile);
        }
        public void SoftmaxScores()
        {
            uint  numActions = 10;
            float lambda     = 0.5f;
            var   recorder   = new TestRecorder <TestContext>();
            var   scorer     = new TestScorer <TestContext>(numActions, uniform: false);

            var mwtt     = new MwtExplorer <TestContext>("mwt", recorder);
            var explorer = new SoftmaxExplorer <TestContext>(scorer, lambda, numActions);

            Random rand = new Random();

            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
            {
                Id = 100
            });
            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
            {
                Id = 101
            });
            mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
            {
                Id = 102
            });

            var interactions = recorder.GetAllInteractions();

            Assert.AreEqual(3, interactions.Count);

            for (int i = 0; i < interactions.Count; i++)
            {
                // Scores are not equal therefore probabilities should not be uniform
                Assert.AreNotEqual(interactions[i].Probability, 1.0f / numActions);
                Assert.AreEqual(100 + i, interactions[i].Context.Id);
            }

            // Verify that policy action is chosen all the time
            TestContext context = new TestContext {
                Id = 100
            };
            List <float> scores             = scorer.ScoreActions(context);
            float        maxScore           = 0;
            uint         highestScoreAction = 0;

            for (int i = 0; i < scores.Count; i++)
            {
                if (maxScore < scores[i])
                {
                    maxScore           = scores[i];
                    highestScoreAction = (uint)i + 1;
                }
            }

            explorer.EnableExplore(false);
            for (int i = 0; i < 1000; i++)
            {
                uint chosenAction = mwtt.ChooseAction(explorer, rand.NextDouble().ToString(), new TestContext()
                {
                    Id = (int)i
                });
                Assert.AreEqual(highestScoreAction, chosenAction);
            }
        }